Skip to content

Commit

Permalink
Merge pull request #59 from cheehongsg/master
Browse files Browse the repository at this point in the history
incorrect value or segfault for long-read data
  • Loading branch information
Griffan authored Feb 21, 2024
2 parents 1e1d98d + 539ba69 commit b0c3b40
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 0 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ add_test(NAME myTest3
add_test(NAME myTest4
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND sh -c "diff resource/test/expected/result.Ancestry result.Ancestry && rm result.Ancestry result.selfSM")
add_test(NAME myTest5
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND VerifyBamID --DisableSanityCheck --PileupFile resource/test/test.LongRead.pileup --SVDPrefix resource/test/hapmap_3.3.b37.dat --Reference resource/test/chr20.fa.gz --NumPC 2)
add_test(NAME myTest6
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND sh -c "diff resource/test/expected/test.LongRead.pileup.Ancestry result.Ancestry && rm result.Ancestry result.selfSM")

#add_test( NAME myTestPlot
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
Expand Down
45 changes: 45 additions & 0 deletions SimplePileupViewer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,45 @@ static std::string ParsePileupSeq(std::string seq, std::string refAllele)
}
return newSeq;
}

// adapted from ParsePileupSeq(..) above to ensure both bases and quals are synchronized
static void ParsePileupSeqBasesOnly(std::string seq, std::string qual, std::string refAllele, std::string &pseq, std::string &pqual)
{
pseq.clear();
pqual.clear();
for(int i=0, iq=0; i!=seq.size(); ++i) {
if(seq[i]=='+' or seq[i]=='-') {
int tmpIndex=i+1;
while(tmpIndex != seq.size() and std::isdigit(seq[tmpIndex])) {
tmpIndex++;
}
int digitLen=tmpIndex-(i+1);
int clipLen=std::stoi(seq.substr(i+1,digitLen));
i+= digitLen + clipLen;
} else if(seq[i] == '^') {
i+=1;
}
else if(seq[i]=='.' or seq[i]==',') {
//pseq += refAllele[0]; // FIXME: upper/lower case
pseq += seq[i];
pqual += qual[iq];
++iq;
} else if(seq[i] == 'A' or seq[i]== 'G' or seq[i] =='C' or seq[i] == 'T' or seq[i]=='N'
or seq[i] == 'a' or seq[i]== 'g' or seq[i] =='c' or seq[i] == 't' or seq[i]=='n') {
pseq += seq[i];
assert(iq<qual.length());
pqual += qual[iq];
++iq;
} else if(seq[i]=='*' or seq[i]=='#') {
// skip deletion ; not modeled (just like insertion)
// pseq += seq[i];
// pqual += qual[iq];
++iq;
}
}
assert(pseq.length()==pqual.length());
}

int SimplePileupViewer::ReadPileup(const std::string &filePath) {

int globalIndex=0;
Expand Down Expand Up @@ -738,6 +777,12 @@ int SimplePileupViewer::ReadPileup(const std::string &filePath) {
}
}
// seq=ParsePileupSeq(seq,refAllele);
std::string pseq;
std::string pqual;
ParsePileupSeqBasesOnly(seq, qual, refAllele, pseq, pqual);
seq = pseq;
qual = pqual;
depth = pqual.length(); // count only SNPs, not indels

if(bedTable.find(pChr)==bedTable.end())
continue;
Expand Down
3 changes: 3 additions & 0 deletions resource/test/expected/test.LongRead.pileup.Ancestry
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PC ContaminatingSample IntendedSample
1 0.109493 -0.0392594
2 0.0961996 -0.0553155
2 changes: 2 additions & 0 deletions resource/test/expected/test.LongRead.pileup.selfSM
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#SEQ_ID RG CHIP_ID #SNPS #READS AVG_DP FREEMIX FREELK1 FREELK0 FREE_RH FREE_RA CHIPMIX CHIPLK1 CHIPLK0 CHIP_RH CHIP_RA DPREF RDPHET RDPALT
DefaultSampleName NA NA 9787 NA 33.2308 0.419446 -388.883 -424.863 NA NA NA NA NA NA NA NA NA NA
13 changes: 13 additions & 0 deletions resource/test/test.LongRead.pileup
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
20 76962 T 27 Cc,.,cC.cC,C.+2GC.+2GC,,CcC.CcCCCcc ?K@B6D>F;GP5..99@<DJ:6H??@@
20 419067 C 40 .t.,T...t.,T.TTTTttT.-11CTAATTTGCCT.Tt.Tt.tTt..,..,..t =:B;JCGF1HAFA@DGG;5CC7G/DJKHAG7EGC=@=?FG
20 456452 T 22 C.,ccC,CCc,-10aacattgacaCCcC.,.C,cC 1A>HH9E=BD;EBAB<CH>>E5
20 698987 G 36 .TT*T,Tt,..+10TTTTTCTTTA..,,tT.,TTT.,tttTTtt,T.tT F=G7KVDLHKG77NN{JED@@@K3KMMI@5NA8F;K
20 828791 C 47 ,TT,Tt.t..,.t.T..,t,TT.T.,TtT.-7ACCCAGGTt.tt...t...,,,tT 2EGCAEDE@MHEA?=IEFC9G>A?EB;BA:H>HE;A1E1=E52GDDD
20 2166427 T 38 CC.,.C.CC,,Cc.,C,ccaC,...,Cc,,C.,.a.,+11aaaaaaaaaaa, ;8>8>EF7C>>F0I3C:;7?E2;EH7676.BE1DDF.B
20 2681099 A 23 g,.,g,.g..,GgGgGggGG.g^], CD<IE<?I9<CA1;CCH2:65H;
20 2799807 A 43 ,.g.,gg,,+8tatatttcg.GGG.,,g,GGGgGGggG.gG..,,G,.GgGGg CHKC2:4E{HF8EHBGGED99J=GLJNB?F{==DFMFB{D{N4
20 3189982 G 33 ***a.,+1aT.-1AAA,-1a,-1a,+2aa*T**,+4aaaa.-1A.+2AA.,-2aaAaA*..-1A*.-1A,+1a,+1aT 2?=M5G>188FF<.BFF@<<>I7C;0;>;@DD;
20 4390249 G 45 ,a...A,A,.,..,.Aaa.,.A,-12atgactactttca.,Aa.,.aaA.,A.A.,aaaa E9255FDB3K9GBCK@C;GGGD0BJGBGC4BL9H=GCH9GFCENK
20 4544193 G 31 ..A*..+9AAGAAGAAAAA.,,a.aA.*,.a.*,.A,.,A.. 8=C2C7>D=5C2B=;?>?D4/1<9>87:CA1
20 12484919 C 29 ,.-3TTT.-2TT,-5ttttt*.+1T,-1tT.*T*t,.-1TT,ttt*,*t,.-1T.-1T,,-1t C/5KGAL:D;?DI;A.;:II9G4GHF5DB
20 49870445 T 37 ,.+6GTGAGA,.+4GTGA.,.+2GA.,.a.,,.,.a.-2GAAa,,a,a.+4GAGA**aa.,,.,. @{H{{A{{A{{{GG{J{{{2{FD{J{{52{{{HE{E{

0 comments on commit b0c3b40

Please sign in to comment.