diff --git a/src/multiBamCov/multiBamCov.cpp b/src/multiBamCov/multiBamCov.cpp index 920cbf96..1ec88e9e 100644 --- a/src/multiBamCov/multiBamCov.cpp +++ b/src/multiBamCov/multiBamCov.cpp @@ -22,7 +22,7 @@ MultiCovBam::MultiCovBam(const vector &bam_files, const string bed_file, bool keepDuplicates, bool keepFailedQC, bool obeySplits, bool sameStrand, bool diffStrand, float overlapFraction, - bool reciprocal) + bool reciprocal, bool extendOneRead) : _bam_files(bam_files), _bed_file(bed_file), @@ -34,7 +34,8 @@ _obeySplits(obeySplits), _sameStrand(sameStrand), _diffStrand(diffStrand), _overlapFraction(overlapFraction), -_reciprocal(reciprocal) +_reciprocal(reciprocal), +_extendOneRead(extendOneRead) { _bed = new BedFile(_bed_file); LoadBamFileMap(); @@ -157,6 +158,8 @@ void MultiCovBam::CollectCoverage() (_diffStrand && strands_are_same) || (al.MapQuality < _minQual) || (duplicate) || + (_extendOneRead && al.IsSecondMate()) || + (_extendOneRead && !al.IsProperPair()) || (failedQC) ) { @@ -185,6 +188,32 @@ void MultiCovBam::CollectCoverage() counts[bamFileMap[al.Filename]]++; } } + else if (_extendOneRead) { + int al_start = al.Position; + if (al.IsReverseStrand()) + al_start = al.MatePosition; + + int al_end = al.InsertSize + al_start; + + CHRPOS s = max(al.Position, (int) bed.start); + CHRPOS e = min(al_end, (int) bed.end); + CHRPOS aLength = (bed.end - bed.start); + CHRPOS bLength = (al_end - al.Position); + int overlapBases = (e - s); + float aOverlap = + ( (float) overlapBases / (float) aLength ); + float bOverlap = + ( (float) overlapBases / (float) bLength ); + + if ( aOverlap >= _overlapFraction) + { + if (!_reciprocal) + counts[bamFileMap[al.Filename]]++; + else if (bOverlap >= _overlapFraction) + counts[bamFileMap[al.Filename]]++; + } + + } else { // break alignment into discrete blocks, bedVector bed_blocks, hits; diff --git a/src/multiBamCov/multiBamCov.h b/src/multiBamCov/multiBamCov.h index d0f96d80..87b66021 100644 --- a/src/multiBamCov/multiBamCov.h +++ b/src/multiBamCov/multiBamCov.h @@ -36,7 +36,7 @@ class MultiCovBam { bool keepDuplicates, bool keepFailedQC, bool obeySplits, bool sameStrand, bool diffStrand, float overlapFraction, - bool reciprocal); + bool reciprocal, bool extendOneRead); // destructor ~MultiCovBam(void); @@ -62,6 +62,7 @@ class MultiCovBam { bool _diffStrand; float _overlapFraction; bool _reciprocal; + bool _extendOneRead; map bamFileMap; diff --git a/src/multiBamCov/multiBamCovMain.cpp b/src/multiBamCov/multiBamCovMain.cpp index 798a5a0c..5e806e75 100644 --- a/src/multiBamCov/multiBamCovMain.cpp +++ b/src/multiBamCov/multiBamCovMain.cpp @@ -46,6 +46,7 @@ int multibamcov_main(int argc, char* argv[]) { float overlapFraction = 1E-9; bool haveFraction = false; bool reciprocalFraction = false; + bool extendOneRead = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -121,11 +122,21 @@ int multibamcov_main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-S", 2, parameterLength)) { diffStrand = true; } + else if (PARAMETER_CHECK("-E", 2, parameterLength)) { + extendOneRead = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; } + + if (extendOneRead && (sameStrand || diffStrand)) + cerr << endl << "*****ERROR: read extension cannot be used with strand constrains" << endl; + if (extendOneRead && obeySplits) + cerr << endl << "*****ERROR: read extension cannot be used with split option" << endl; + + } if (!showHelp) { @@ -134,7 +145,7 @@ int multibamcov_main(int argc, char* argv[]) { keepDuplicates, keepFailedQC, obeySplits, sameStrand, diffStrand, overlapFraction, - reciprocalFraction); + reciprocalFraction, extendOneRead); mc->CollectCoverage(); delete mc; } @@ -185,6 +196,8 @@ void multibamcov_help(void) { cerr << "\t-p\t" << "Only count proper pairs. Default counts all alignments with" << endl; cerr << "\t\t" << "MAPQ > -q argument, regardless of the BAM FLAG field." << endl << endl; + cerr << "\t-E\t" << "Extend one read (R1) to the extent of insert size" << endl << endl; + // end the program here exit(1);