From a29c3c080771aff6130dc350fe6451a027d12956 Mon Sep 17 00:00:00 2001 From: dawe Date: Tue, 28 May 2013 10:07:00 +0200 Subject: [PATCH 1/3] Added extension option to multiBamCov. In principle this should count library fragments instead of alignments --- src/multiBamCov/multiBamCov.cpp | 28 ++++++++++++++++++++++++++-- src/multiBamCov/multiBamCov.h | 3 ++- src/multiBamCov/multiBamCovMain.cpp | 15 ++++++++++++++- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp index 920cbf96..f6bac72a 100644 --- a/src/multiBamCov/multiBamCov.cpp +++ b/src/multiBamCov/multiBamCov.cpp @@ -22,7 +22,7 @@ MultiCovBam::MultiCovBam(const vector &bam_files, const string bed_file, bool keepDuplicates, bool keepFailedQC, bool obeySplits, bool sameStrand, bool diffStrand, float overlapFraction, - bool reciprocal) + bool reciprocal, bool extendOneRead) : _bam_files(bam_files), _bed_file(bed_file), @@ -34,7 +34,8 @@ _obeySplits(obeySplits), _sameStrand(sameStrand), _diffStrand(diffStrand), _overlapFraction(overlapFraction), -_reciprocal(reciprocal) +_reciprocal(reciprocal), +_extendOneRead(extendOneRead) { _bed = new BedFile(_bed_file); LoadBamFileMap(); @@ -157,6 +158,8 @@ void MultiCovBam::CollectCoverage() (_diffStrand && strands_are_same) || (al.MapQuality < _minQual) || (duplicate) || + (_extendOneRead && al.IsSecondMate()) || + (_extendOneRead && !al.IsProperPair()) || (failedQC) ) { @@ -185,6 +188,27 @@ void MultiCovBam::CollectCoverage() counts[bamFileMap[al.Filename]]++; } } + else if (_extendOneRead) { + int al_end = al.InsertSize + al.Position; + CHRPOS s = max(al.Position, (int) bed.start); + CHRPOS e = min(al_end, (int) bed.end); + CHRPOS aLength = (bed.end - bed.start); + CHRPOS bLength = (al_end - al.Position); + int overlapBases = (e - s); + float aOverlap = + ( (float) overlapBases / (float) aLength ); + float bOverlap = + ( (float) overlapBases / (float) bLength ); + + if ( aOverlap >= _overlapFraction) + { + if (!_reciprocal) + counts[bamFileMap[al.Filename]]++; + else if (bOverlap >= _overlapFraction) + counts[bamFileMap[al.Filename]]++; + } + + } else { // break alignment into discrete blocks, bedVector bed_blocks, hits; diff --git a/src/multiBamCov/multiBamCov.h b/src/multiBamCov/multiBamCov.h index d0f96d80..87b66021 100644 --- a/src/multiBamCov/multiBamCov.h +++ b/src/multiBamCov/multiBamCov.h @@ -36,7 +36,7 @@ class MultiCovBam { bool keepDuplicates, bool keepFailedQC, bool obeySplits, bool sameStrand, bool diffStrand, float overlapFraction, - bool reciprocal); + bool reciprocal, bool extendOneRead); // destructor ~MultiCovBam(void); @@ -62,6 +62,7 @@ class MultiCovBam { bool _diffStrand; float _overlapFraction; bool _reciprocal; + bool _extendOneRead; map bamFileMap; diff --git a/src/multiBamCov/multiBamCovMain.cpp b/src/multiBamCov/multiBamCovMain.cpp index 798a5a0c..5e806e75 100644 --- a/src/multiBamCov/multiBamCovMain.cpp +++ b/src/multiBamCov/multiBamCovMain.cpp @@ -46,6 +46,7 @@ int multibamcov_main(int argc, char* argv[]) { float overlapFraction = 1E-9; bool haveFraction = false; bool reciprocalFraction = false; + bool extendOneRead = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -121,11 +122,21 @@ int multibamcov_main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-S", 2, parameterLength)) { diffStrand = true; } + else if (PARAMETER_CHECK("-E", 2, parameterLength)) { + extendOneRead = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; } + + if (extendOneRead && (sameStrand || diffStrand)) + cerr << endl << "*****ERROR: read extension cannot be used with strand constrains" << endl; + if (extendOneRead && obeySplits) + cerr << endl << "*****ERROR: read extension cannot be used with split option" << endl; + + } if (!showHelp) { @@ -134,7 +145,7 @@ int multibamcov_main(int argc, char* argv[]) { keepDuplicates, keepFailedQC, obeySplits, sameStrand, diffStrand, overlapFraction, - reciprocalFraction); + reciprocalFraction, extendOneRead); mc->CollectCoverage(); delete mc; } @@ -185,6 +196,8 @@ void multibamcov_help(void) { cerr << "\t-p\t" << "Only count proper pairs. Default counts all alignments with" << endl; cerr << "\t\t" << "MAPQ > -q argument, regardless of the BAM FLAG field." << endl << endl; + cerr << "\t-E\t" << "Extend one read (R1) to the extent of insert size" << endl << endl; + // end the program here exit(1); From 95cf1e0d1ac9ded2daad48c74037df75c2bca530 Mon Sep 17 00:00:00 2001 From: dawe Date: Tue, 28 May 2013 10:20:43 +0200 Subject: [PATCH 2/3] If R1 is on the reverse strand, the extension should be negative --- src/multiBamCov/multiBamCov.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp index f6bac72a..4fd9413f 100644 --- a/src/multiBamCov/multiBamCov.cpp +++ b/src/multiBamCov/multiBamCov.cpp @@ -189,7 +189,12 @@ void MultiCovBam::CollectCoverage() } } else if (_extendOneRead) { - int al_end = al.InsertSize + al.Position; + int al_start = al.Position; + int al_end = al.InsertSize + al_start; + if (al.IsReverseStrand()) { + al_end = al.Position; + al_start = al_end - al.InsertSize; + } CHRPOS s = max(al.Position, (int) bed.start); CHRPOS e = min(al_end, (int) bed.end); CHRPOS aLength = (bed.end - bed.start); From 84d97bdfc2c7ff440c2bbf116376dd0bd35e51a5 Mon Sep 17 00:00:00 2001 From: dawe Date: Tue, 28 May 2013 10:22:59 +0200 Subject: [PATCH 3/3] Correct handling of R1 on reverse strand --- src/multiBamCov/multiBamCov.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp index 4fd9413f..1ec88e9e 100644 --- a/src/multiBamCov/multiBamCov.cpp +++ b/src/multiBamCov/multiBamCov.cpp @@ -190,11 +190,11 @@ void MultiCovBam::CollectCoverage() } else if (_extendOneRead) { int al_start = al.Position; - int al_end = al.InsertSize + al_start; - if (al.IsReverseStrand()) { - al_end = al.Position; - al_start = al_end - al.InsertSize; - } + if (al.IsReverseStrand()) + al_start = al.MatePosition; + + int al_end = al.InsertSize + al_start; + CHRPOS s = max(al.Position, (int) bed.start); CHRPOS e = min(al_end, (int) bed.end); CHRPOS aLength = (bed.end - bed.start);