From 2efd1c7e6baf5402782f07682cf59132487bcc44 Mon Sep 17 00:00:00 2001 From: Zilong-Li Date: Thu, 26 Sep 2024 09:38:12 +0200 Subject: [PATCH] can setRegion/getStatus multiple times without potential leaks --- news.org | 8 ++ test/bcf-reader.cpp | 70 +++++++++++++---- test/test-no-index.vcf.gz | Bin 0 -> 23728 bytes vcfpp.h | 161 +++++++++++++++++++++++++++----------- 4 files changed, 181 insertions(+), 58 deletions(-) create mode 100644 test/test-no-index.vcf.gz diff --git a/news.org b/news.org index 31dc2ed..50ebf90 100644 --- a/news.org +++ b/news.org @@ -1,4 +1,12 @@ #+title: News and Changes +* v0.6.0 +- add =BcfReader::getStatus= +- fix =BcfReader::getVariantsCount= +- improve memory safety + +* v0.5.2 +- throw errors if a query region doesn't exist + * v0.5.1 - add =BcfHeader::updateSamples= diff --git a/test/bcf-reader.cpp b/test/bcf-reader.cpp index 78fe96c..11fe014 100644 --- a/test/bcf-reader.cpp +++ b/test/bcf-reader.cpp @@ -1,5 +1,6 @@ #include "../vcfpp.h" #include "catch.hh" +#include using namespace vcfpp; using namespace std; @@ -68,7 +69,7 @@ TEST_CASE("parse PL in vcf - vector", "[bcf-reader]") { string vcffile{"test-GL.vcf.gz"}; BcfWriter bw(vcffile, "VCF4.2"); - bw.header.addContig("chr20"); + bw.header.addContig("chr20"); bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); bw.header.addFORMAT("GT", "1", "String", "Genotype"); bw.header.addFORMAT("PL", "G", "Integer", "List of Phred-scaled genotype likelihoods"); @@ -78,8 +79,8 @@ TEST_CASE("parse PL in vcf - vector", "[bcf-reader]") BcfReader br(vcffile); BcfRecord var(br.header); vector pl; - REQUIRE(br.getNextVariant(var)==true); - var.getFORMAT("PL",pl); + REQUIRE(br.getNextVariant(var) == true); + var.getFORMAT("PL", pl); for(auto g : pl) cout << g << endl; } @@ -87,18 +88,19 @@ TEST_CASE("parse GL in vcf - vector", "[bcf-reader]") { string vcffile{"test-GL.vcf.gz"}; BcfWriter bw(vcffile, "VCF4.2"); - bw.header.addContig("chr20"); + bw.header.addContig("chr20"); bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); bw.header.addFORMAT("GT", "1", "String", "Genotype"); bw.header.addFORMAT("GL", "G", "Float", "List of log scale genotype likelihoods"); for(auto & s : {"id01", "id02"}) bw.header.addSample(s); // add 3 samples - bw.writeLine("chr20\t2006060\trs146931526\tG\tA\t100\tPASS\tAF=0.02\tGT:GL\t0/1:-323.03,-99.29,-802.53\t1/1:-133.03,-299.29,-902.53"); + bw.writeLine("chr20\t2006060\trs146931526\tG\tA\t100\tPASS\tAF=0.02\tGT:GL\t0/" + "1:-323.03,-99.29,-802.53\t1/1:-133.03,-299.29,-902.53"); bw.close(); BcfReader br(vcffile); BcfRecord var(br.header); vector gl; - REQUIRE(br.getNextVariant(var)==true); - var.getFORMAT("GL",gl); + REQUIRE(br.getNextVariant(var) == true); + var.getFORMAT("GL", gl); for(auto g : gl) cout << g << endl; } @@ -106,7 +108,7 @@ TEST_CASE("parse vcf with multialleles - vector", "[bcf-reader]") { string vcffile{"test-multialleles.vcf.gz"}; BcfWriter bw(vcffile, "VCF4.2"); - bw.header.addContig("chr20"); + bw.header.addContig("chr20"); bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); bw.header.addFORMAT("GT", "1", "String", "Genotype"); for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples @@ -116,7 +118,7 @@ TEST_CASE("parse vcf with multialleles - vector", "[bcf-reader]") BcfReader br("test-multialleles.vcf.gz"); BcfRecord var(br.header); vector gt; - REQUIRE(br.getNextVariant(var)==true); + REQUIRE(br.getNextVariant(var) == true); auto l2 = var.asString(); REQUIRE(l2 == l1 + "\n"); var.getGenotypes(gt); @@ -127,7 +129,7 @@ TEST_CASE("parse EV in vcf - vector", "[bcf-reader]") { string vcffile{"test-GL.vcf.gz"}; BcfWriter bw(vcffile, "VCF4.2"); - bw.header.addContig("chr20"); + bw.header.addContig("chr20"); bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); bw.header.addFORMAT("GT", "1", "String", "Genotype"); bw.header.addFORMAT("EV", "G", "String", "Classes of evidence supporting final genotype"); @@ -137,10 +139,10 @@ TEST_CASE("parse EV in vcf - vector", "[bcf-reader]") BcfReader br(vcffile); BcfRecord var(br.header); vector ev; - REQUIRE(br.getNextVariant(var)==true); - var.getFORMAT("EV",ev); - REQUIRE(ev[0]=="RD"); - REQUIRE(ev[1]=="SR,PE"); + REQUIRE(br.getNextVariant(var) == true); + var.getFORMAT("EV", ev); + REQUIRE(ev[0] == "RD"); + REQUIRE(ev[1] == "SR,PE"); } TEST_CASE("throw error when file is not valid", "[bcf-reader]") @@ -150,3 +152,43 @@ TEST_CASE("throw error when file is not valid", "[bcf-reader]") BcfReader bw; CHECK_THROWS(bw.open("ff://no-access.vcf.gz")); } + +TEST_CASE("throw error if the region is not valid", "[bcf-reader]") +{ + BcfReader * br = nullptr; + try + { + br = new BcfReader("test.vcf.gz", "XXXX"); + delete br; + } + catch(exception & e) + { + cout << e.what(); + } +} + +TEST_CASE("can check the status of a region", "[bcf-reader]") +{ + int status; + BcfReader br("test.vcf.gz"); + status = br.getStatus("chr21:5030089-5030090"); + REQUIRE(status == 0); // valid but empty + status = br.getStatus("chr21:5030089-"); + REQUIRE(status == 1); // valid and not empty + status = br.getStatus("chr22:5030089"); + REQUIRE(status == -2); // not valid or not found in the VCF + BcfReader br2("test-no-index.vcf.gz"); + status = br2.getStatus("chr21"); + REQUIRE(status == -1); // no index file found +} + +TEST_CASE("can count the number of variants in a valid region", "[bcf-reader]") +{ + // BcfReader br("test.vcf.gz", "chr21:5030089-5030090"); + int nVariants = -1; + BcfReader br("test.vcf.gz"); + nVariants = br.getVariantsCount("chr21:5030089-5030090"); + REQUIRE(nVariants == 0); + nVariants = br.getVariantsCount("chr21:5030089-"); + REQUIRE(nVariants == 13); +} diff --git a/test/test-no-index.vcf.gz b/test/test-no-index.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..9438b0c3de77759dde856c75801735cdc18f38cd GIT binary patch literal 23728 zcmZsBWmHt}7w!xU-Cfe%k}3!cB}kW~fRaOtbmK@VBB+!gFm(3-N((3=0z-ExNQX!W z2;6u4{n!0)*Y$&I=A3=@yW`o<-up1@@q{o4_TLpgL`4+`@{=2FZXNLbCUsbjt)cTl z&3wyU)5}I@`IgwbDlzp@Je@@WKY2B#@FrZiKkzbj{XM%?S9|)HCOG(P0Df=2ynICT zZGF99|0AK5cbnyFTf1Ft=!l#0TgPWVW|zlf7Y4@dou%bEl0tWsiUx&T${qqVEf%Lh(KrJlB53Vj3;K__J0#{76JF(RiE8)AQWW_ih|LJ!e|f zU;CzS=JaxS(f1_==yj^$W^a?0air3Ti5#iY<6%d@xFe?1_UY9qc_?@6 z>iy<$MiXDQn3JqB7kQ-|isz@W)f7KPo7KSa0oXV}nAe$)iquzokteS{3l@|-jJdyl z?7(pwi}J3{2} zENggLhNdy$hZ6N0R{2}&tsT^Ldy*Bm+@<*fOlLGcvv%&-ncESY-N##>EuZw5fAJGe zJC)hgdCsL|uH0eMIbXXYeP8NWo^NeGT}wyIL`O=8!#?M_eJSN#zfd1SmNn|2H;BjoI zll#Uc&@}COkDyLTmmfu4*#6dw6wlM~Kl4q$7~K9Q@Cl|6_25&#c|F0F>BUrXg~R)E zoPqeKFP@+^hYn?sl=4aE9T~<_GwqYm!+h_%J_+=r&1Nr7l(|w{gY_=!tq%Pk`Fy3m zW4ZFv(*r=O5ccq)d$k9-DeZ?B!juZ1!kr3Kd{eDmxW zSh9TN_>w&FhMh#hcvy^UI?2-(TGS&x?>k3>hjX}lzn-b-Fy>g3ywh8}v&aK?mo)$O z0@nzy#CKZYuHOUF>d!$<8h6{hF^mShE`gSba#g6fRl^alL6io&!947fD@0;3qwI6i^iEirhBVCYiAct z%sCdlMB(SorV^C{r%}?)&EJDxXrE*>v(FC?Re%;M9)Z89>WjqtB0Bo_(J`9{SRa1Y<-Kt zZMJgvZ-2uxv1pSSom;=D5fc_20Zn3Ctp^8R<9UtlO_VEovNxXD;pJg?cOG*))BQ5B zTw^+Tt5@xQ!yw7B`v#MGPij}aTp|Q-S znep)Z%g4vT`6~~_%ZIRQ{o>sw@QB>Li|_N12>&7L)pb5syLXspbDhMN(ma_Q{BcNA zDp5C@l~n>L;FEV*bGW z{bufuU{Q&Lxm^2@CtH@H5)Wxq-*wh@H_9c<*{oYV@oLzPn=jdEsoJs($54;P?VLfM zF1c~5BT0yxTwG%HxqiiTBT1|>ru}8 z$tREA&ND?t^L@>Ukvlb>2mOyqL(5>^x(nH~xXnVCC^5~V;%o1?<)|K?Lh{kRNm;9iZS zQu;I_@qk5{bC+df!2Dz3B@0xFPufwUNS~?c`q@<`q=@a7{O#kQZ%m^k@o%nAV@f;T zl9?vZ%HeQK6(x^2;q>5WYk!WMvY^l8oG0oc=GD47iho2osls}=s&uty8z*|P7BEu1 ztTV=*_gI#u!OKw4jIWtJk4?uUr}=sQ-(@TRu>NKenv~d*b!xug?61vI!x#1sN^ugy z@A1A#E-s5PLV8XANo}~P-rAYN9H%BR97`Ph_xf;gci@O^Wh{<%DdKV;sjHTwhbrn) zyXdVR`S?6mK<9c##Jz|=B;!XCjzXnnLriZrbPjJ-_)k@zsooQ|Hu>|8JshEPSn2^A zJ?_z`iz`tGZ=l^>Z+HB?G1tr>)g44AKj+q_b|U2|Ft^dBZl=I$`1)d#;$*Np{Ajv% zrYro&<+@e;vrBU-^32BbUUUPk}mMUvG~|tB5r`T zq_{-1s8h=6>$Hhk78Bn((5pciae;^*g4B`agOtU)Zk>In;|CQy5o2a_kyPu!^eR z-HAPVNnh^pDV#4?yF}r*+uvU$p)Y&w#ezBk7h7E(`}d(Vma8wg+)=?lrC>jwQ7B#= z^{?feDd-L@*9%6#Al|-aOWN`ISILW)vsY&wiyz)}Z+^GB{F^)(Mq6p?b$VoMW9kv8 zzr@=iemeIj>awZhvU?<4-12X-S!^tw_-XTs<3``mLGxv)(&x2zS@G+8N)Zv_^A#<} z#;q`Za`Dr!(!J&(yKZr_fFV6qXA#>2@63Mkitin(;urRg_*5NtG{#@<4jrsDriz6! z{BW#nY4J4G32SSKZ_4mG4eMWf&2`YESXW^lQQucBG`E;#x%T&PP|i*zW^K31QfO=U z!%Fw+wY>1T1wIzXhsU#vSN_`-Z4qVS50N+(VHe?+Bz(xZ9nlWY03Qp3oa$d6&NH`Q z!~DCKF|6WT5pT^D=*Bq~&TgF4Mmz1DpKP67H0Vz4EnYqvOIgnePfV0AJ832FEl;>I zGWl~b`N#H$y*Db%wj<}{Pdx3+Kzr5A?LQk{@)WoGL#{3rSOe(g!a}ze|1LJwE-p4* z-uq(Xb?RxG4sToC`!o}l8}>PTdMn#Dc97t@|d0H7c$xY7vZj` zxrbNBlM%_2draf{0rJm^CaqZae$KFmFILQL5u0hC6$KAlE=5nN)cgCba_PNf=+7@g zZg*evfG%__ri$N1=O|R4KIT;lS0THX-ceP$(ky>X-jHBWJ2xb3QHXrCLvHTc81Aig zs~opT$<+Mq9npv&gY9T7+_k2w{j|czbKeSD zy1Lv!i3#e2&l5fA{;}KimsS5;BUKPM;u`;a%K9nfV6Exzhgcu}=T}~*^f~v2&upqs z{Q^z|e23CnZRg3Jq#Z7N_Xwg=UtSiC2i4`CO0={VLwk;I`Qqk&TPF*qk@yet zL~B(>%Y*rSb!6w_6<G{2Pd#ZMbY<#D_?w!9)PH10Ue zp5|Paz`rm0N9(@8XxjXDe0m}4xb_N+8}~C=`$e+3__u}TujqFV_4V%Z;9B0|rD*lO zz~6R#=!U=3t&m^XTA~#|s(nLnqRV<%ZeL}l>96BjSdRh4w)W-Ak4BGQLq)%=NupyS znlNQNsr#IREC}_k?_o6|IqiBM`}RGTt0ZId!Y;N@Fa7-`o5;l&%=PzC?fx%jXTR6Tn3J4*U$UXDh%PU3 zeoN(>Tt&3pxTNrO(kia?oZ~0X&OW|aKdbOfO^^-&nSkq^o-I4iqtEwcw3?(+&J6MI zlV|(k^mR1a^j^9sInOC>FKaYDMNvP^3dgNX_aWC(T&@=@Tr9HE5#qgA2Mk#-RKYBtO{E-#!pP~(F5kR zp@b)<8-uC-9u%?_6IPL1ciSS%DKv1Zw!BXJ&2F?`2pw&Hv=;jk_TXC}?oOw(U}(f8 zzGZf{b{4b}*7zHvxERz-)coO4G@_-<;+?+rpEjYep~e@TdwsUnac)V{q7wUsM)IS2 zf996S=H5J9Q@#c8tS~!r&BOAZjrIGk6^V4|l2&F!${!j}()Yzkt6aPjtNe2RYV86f zB*-JYYskr3?}J+wLHmP0yDIW?H`TLw8(B+sg}m|6Crbj()SAT zslyyOZR;IL#UL9m__Ry4Dnp-ot%u=y<2I->VaP2{WQk2!3URj};wVc(pW^PBK_PqA z$G`Z?a|(qw>)TiMveIvcQvOQyiti}tY$3HhXxYHsOFz@T6brl40xgb{W|(&`3IeWg6kG%to+XPTKaeWwi_muvt9nKQm5xN@BR}hO6i|i`VRnjSiyu8!DWayQ}rfc|U`=|HZnM(a_%45o7yinEhwT!6B zxy*g7k2fAoLX7WaL7TQ5lZWZI@xOi%MUP~^^YYc&6hD=B*d|lrJS0Cf8~o{fOK0X_ zyfZ5i;qschwpl;c+LZ;t+Mqqdj+bMF3hI!lWy0leL(aBC6<(?$5ugQe9B zFI1N9E^4$lxQduP`$Dm*BjVN1c4fMUf;rYh8C!4nc&Z$Bx{R*faN4lSzlSrlz9=+r zSkYzBw_o|%R%mtao=%>Tyua02r`0#Fl--*__c3S3rY=oH0U~cod78AeDVk6BO-tva z=A3IC_U;R%q=r{0TvUy$R-|xl{T{D+-(Ti!CwzX1=hj&$gWOJB!%e?*Xb z+rg-I%GICqt!KaWPj@2z4qaVFTrLLGoFDpMFi_rjz`mGxg;NLR`Wo2`Jv|)RVb~7| zKcT6y6S~M|xqvws!dYq*6*$lF%=exebe;grOo*TXs1Y4=H)1>REV#V+#bh#92Xd}^wxT9W2S_HVYDk@d*6 z0^2o4wDObXK3m)J@Z1fIXxME=)V$R^;{&qDwcm@78nh_>HaQG0ve4!J!TN8`6zIT6 z9fi#OCSRDbOMteQbJoJZNOKP(W)0o*5S}w?VN%$x#~y$$t?kCcTIjjK`!RC8^Dh2M z2VAY&T>FZkWp)^AWKlJaLg- z>FIBhFN#`Y*7X*_zsuk<+E$qDmgYh!K@=-x8TFUGGDeJ#!gvo@npzlPOM#3Qo)3q!e;Wx_EuYL5~G;#^`TcGUTuQA$SP zy!l6g;1nH?Vfu_h-KR`dW_|u5*r;_KKZlk%Y=YPsvH0v!Qnr=;zMC`J@4_B@I9MQb zbcEY=fWfRWc37$$%he0Z$;Bs*+R^&BHAYD2m{ zeInr+yB&5@%g?ol{D!OsnT~7YG8ryOL!BOD`WQ1QqUfcrS+WgZc_O>s1zzNK%eugm zgo)g+^*0&%x3=EQl$;z%z7@_=M(H$4r%;9Ihao!m{3H{zNFl}7d`(Z}-injEKZxs& zv^lqNpJZEbK6*Y78J>OgtL0lqq+)5>b>~e#$*VtwO}3G~DSxj+db~1jAIiNYw$=#A z(g_m7Y&p!{#Q4;`cmR?6P!I;9uN6x-)}Au(ew4ofb00O?Hx`96F!D1*0%dg;C519m zi_hA-Q~f5Z(4=rwUe}D)fPXf#Y;5gtIu^RL@>)!}M$eRV-6tcpE`PGGZYo1M z?9a4E<=6yK+2S4fc{A7kfk?IveI8u+r7pLzDZx6k35TF?kAwD$fm7 zpLrX8l0t3bk~iLKxPFqwgOghhRdeL?Ben7Vd1-14!pV-}$)%RpQTm&98v(~EZ`n)* z`fUVeooLrLnrc^WhUB`pjM8V6Cf23gP|n>3C=(yo-l#b~Aj+bgCUI;}lz=4bo2$HO zGjQ6-grazuzk*7C5PK@vIidmzhqX=+LhKlOoU89@J@!Z;l*I~J$MAIoK_b*i9t#cA z+nXX?pX?K&$L;i!l>0%OqUJ)f4v)uCxDb=TkNkc^ep1gj>)TX~WzOhbQIF0M>EL|1 z>0F^F_M56!khB-;?__<|O_(qYu7xqcxrc%l$SsFYjblVrZ$#}yK~7x^Q5JGamju1y zD0T$vf{K2h;^pvwILb;6!M7Bq=jG}`FxfsmDDu%yQpOqbd?6FMzWzF41V8P`H`edztsN^yXq{KCT<#_j>twhF`mq#5XH)kc4vCC4QZHxhdmn_d z-I@ub8z!3&)@DF|IN}QeO3cdlC|9S438S4?had?v9nCZf!i;|$h;-U4(AXg&cGMQY zIQtE>w_m9#Hcc0Z`)YPa%GF>iO{MHy?)ExSI7EUqd;< zTO_d0WbHai`ZeQ&lE4<2k7V$J%q6+$QagC${Z1x`P@Wd_kvC>qDE6zvTT+O;tLp~N zxrh~TX>j!uq-H4Zmc^4>S%Z<;VVUbru=f=1W0Y-yuYH$y93`#pT9!oHAc$}aPBKK5 z@xD3~qNANQ>)+rksm1GVZaPX9Hno+d_5IN;-E^!&J06%ZSnSa33;kGx!;wXq33Pn+XMmpX9P5vOYl$cO@-~ zSwcIN&RdzA@S+XG0a}|Fc3@L3(Nd^j{^!somA512pg?|(Su5wd zkGg}}e@PD*CU;vPK?f8|A9+_f!&k+tj|buq+OOM~CmxMetZ15XKWf>UHlv?)@1RTe zjH~Ep;oN^PI<;wE-Ffjk1N2|`+XbvqK7o%nB)8AESHcC{NWb>I`>0XnZ||PwAHO z($V){3d&1}hDG1!PqmRGV&dl z=97_>l*A-o#N7L1;?yu9uG<{AN&1w*1oDlrcCm)XNO+b#sYqLy<1M`t$cg&#RPiPz zoqA<6X@`g!q8p~Q|HtXY9Ty1mxDTWc_Qdk|{TS-&B=$rqnZ{(+x^&bgriASe{xs8@ zp#AoP7smN$)gZnva?(Fn@!HvfLy`WmpB>ol^T|t+-wJ#vFXp%V1i?6XrzkOx5!*WzGFx&}~$WYv= zNfF(_f^3)A(5MPkdK%bmyF&$Bw%R)-Aw6Za`b*l{hBPp?Y5F`vP zr@UmjRYtVQzxBiAX=LW32nY`UwhN7~oHR2-sH2C7Xxv)P6Jq$Aw5P=d4PpO1aD+?{ zg(jwNuHv+JrZ(^+0tD=loi~g%5~rZ3T74pv_m^^2XV(o8NES%R4v`V$6qXwyP)hU| zf_;vWrVRJCN-rk`Bw-VpnB>ht3^$%5S)#HRp@1goQJ(eUzUA05co_ze{N}oBq#cqx zast4A@a75;%CHRwBOSbCYV#+X11r2wC=dKpaYJ5%+iQ1nJGgIvs30n zhzW;cOUP?rMkNsrI|&QJrpn^+HO@WE`_SWzRrV+3k?zP&vkxcP9QzJ~VMt8n{6}tI zXkrc3Yb9LZ;lzDwe2z>G0)UwPIKeyXIBd1YVEq29$Q$W^Y}VM$h`CP>41~d*k+g6{ z<%!&DR*!+hGmttWIo9XiUp}5h7T;F+SU;l#fhJD9VG-OX%sGq7o*GQTS=UK|BJ?Tu z<6Lse*Ojh)<0B00VN8&|Bn9q#ej)&nED7A;(}b*GLFNW3i;vd{_f!P@xOH`8Nu>Ck06(~n03WmA4GSBV) zE(+!3(!h^Z4O@tD)JuYD@_&Ky(4e7cfI+&w7h%MZg#0w%1P+@-)h5C+VODdD2WRyF(KUjUl0yKtB$bya%!|H_blUouybyh&WcPN&a7#wJRO+P- z1Ysf$3@o{Xp`U2z$h*$saAW~n*k~O7ao{w}1Mnb#bge$NI6<%rofrTu4W3CQLB8;5 zg1iHwxIgw17om^2l)^wUH-n)ZSpX|o@chVNA%Pp`;DlNJwb#C4j}GT~0=a@f*#DoO z`8=b^nr|x;?WKxYo@>e-Qx5PuV*hhf@{YfIFY(K0hwB;-qCN)f^`KC;fhScYf$19= zUX?PVZp~-s6IO2ay%A>=SATwOiCN@P1`6LEbU~d^&lxwYAj1=Q2U+|!VJxiZ7fLO-d!Q(|qOG9PA-z3LWwu$8;8#y(5 zlDHk+B1b5BxSBaN)pBC+ITWaQ@>o^tk@P4Jgc$}(8v~TsctvL9-k>_yQ-unUk-;Ad z*!sTAB|VhdT-q2pl#_rEA+e^f4&WnLHtA@AtA@%%2*wO?Ik_S{1k+4p2B&7! zW%G~~yc{YAj*_OOlZN;KrwgOgfiNC|5vlggV;wTWZ4EyzuVq0w5mM&{Jd_tgKt@KN zs=csB;8@1cbBLbekCCy$EeSQF7DC_Q3y>*uHM0pqL{)Jy(#es{xfaSvaYgnsk-j?E zILA|cv(Rfhjl9)Qfr6kxjQS05GGh2p*U>huE`g^U2tyf|kAN1;F;IklMB`7F@ zNee!TfhEV#12HJDkg&!6?G8>I_mQbGtAP|PHgPTj&}fhtHdPXgBpT|{$4RA{2%`8R z4@#`shQu+Df<3DV$!7w&$cP-l;1uvK()mJM`n;fInra;}avUt9k!omrgvmc^I9NwB z(JyWWrpVgw`{}JEDwNA2lPa# ziLi`rN50Jc=i8_Oj+QD7<~kai!-f^C|1Emu_Y4S{u*c2gPs?Q^ldBCIvt-HFto#)&FovpX8@VtS*gsdi}Ly(|PQr z2={+q0-GKTKAws?1eYjKWSZ5ZP&An}-1J}pfTIn`L3Pf|9NCx)Lz4~=W9#Ri1Qz`* zdczQze-ds?%#ksyYrA;=w9@!MJ=Rnvx+2&tDeajdhO})bf8`TPTxY zWU;4Y07JBUT*2|=0;KAAWbkdE38^Nc(1xd}-45N@0M#o4 zfcDj^9A%>npvnwF0-)-ICkpSz;E#doJ^%-<$n8uBmEg|A;21D6)BRT#Fi7TE2 zNjAkKU#g6~K<$$wn2F&o0EVU*m^7&<{2{^&SkIEDCany87H5yukcY%aISN1vL1B?3 zrG}bO@^m>V`rii$8U{tjijk%T053sRpg^r#Ms`v1?^8e2*a1`lvca5rPV ztFHkBq~?MXG)Zb9WU4Jla@2KS=(OJq7M1D%m&Y>6pa%YLQbsoCapWqI|6{Y-M(`;1 z-v5P#i+wX0MsiG808)tMs1lY8Iy8V%3kXmt!$~+>)&$lw1Lo(%|8PYM_`(w)`d@V2 z2M^FBU$hpi@caPZ0*Hd5!j@5-2Lft|Z2+gx0^SZhS~7zhfC7lnG=|Fg$T}n+${u?k zOUk1dQPi+d9QCUh0jh3H3ARk6h5ywO**YU2|6D8hv`~o|wlm4`k$4m2a2=RG0D6k&znVYK&JwQDR4IGEb|^X-^ywQYG{9eu8e;g;2aLCLk9n2D!< z2M@C9uk{WGVJ3Q80tO#%qqp09QtjPdEZpj^wPSuxFz`O+bNnI8F_5y~eDyQ_F!l_5 zY#!O%z2H(B2{N4AI^@fsh}fT_5g&0rk+Lc1(B}sg#!uxfb7$|J<6N%B z3}BSsC>qOybB=l6lXgHHEIA9n4qy3^^FfQx6DqCX(mBn)OEirkYw?HQ7j96M<`lQs z`l46X)U`>xZF%-;4<{DX%-y~&SVp|kSva2ft}fX-V#Ol*Oe8egfP~4Wf zb}!nNM-)HUZB1QwI5Vz&NABo2&&7TdTi)VY3L}Ar0$&;YM0)^9v^}diF%RB4SJ&3x zGr@$tvYA7OzrK6CkcaCEN>z0@(6Ask^p%LpHrY>+>c5afe4o^l^;gI7jW)%<^GYW- zaO#q~->nWvkJ(D6<^oDxIF0k6U5!7Mz%uWa`Y_oJ!$uS6+S&e~3;27xEbh0E`N8KY zw^_nzKejKQIo>^w5f_DRvt)OJzG|Nr)?E!o7!q&j^u}=ea<5+EF`2%+!px2Po0u-VkV-m_-hYNAX zOT)GSGf(;5blQHjG->$k!A3W3*#t8^X$f$10o1)J!tosZllKiF;$Q0}~^ zjzRR&0q?vWne$l9nmO0w+ZuBL!46+CTN2LLs$v7;m!}ZuB&BxlHkFQ`$VN*Hv)wK& z);M<)$eX9F`X}s3Y^kuB)j<~49uU5VCP?Jo%u*sX-23Yp7rb^u6;rvgIJ$Z}6Q+q$FU5K2>sAY|K=!^{+b@Z3!6moASx04zcA7Ysn+s$(B zZ#uu7q&)iM8B;=1$M&q5LtoWGIcEY+0JrP)S>1WbNGI_~nI2Loj%zTw|VO=ddJa)gq=OThM8S;zsYSot&P|;nhBUc1SV7>d zsvoH#dzn*QGkIZd1;?h`7}>|*gz{)qm1;kwAM1#O;Up)MF=pn`xfBn=oDGdrMx9y&?iPS@^Vk=I_+{CiPaB z&YH5E4EtVzU zc75NkWDhXE*6!gN-uZpsk)#xBm$6~%m+Y6=Pu7XB9h@2L7R08|cEojVZx5tg_O{4@ z%{zSl;0rpf1Yd-FC=4{-ZiCh@e4^m7AyU=TykXJ-20sM_1jjHs*ozCT+6~+|qu&Al z+fSu~KoH7Pi;z#Az=1rw2X?YF)2Cq8oT4}i2nAyn2p$(sMIrH@*vl z5Z@d%h%hvUk@0xmz_vLVRSdlA(uBpn4I!$^l4QCErell|&(7h(U2U%piG{oU>u1{O zNrV?M15izUaqP&^Ksz0SDvY~q?s$`$dzF|_kjjrSYwIYI!@}c6E;gHq%?S^RZ zX`#>_$tT5Z9yn~|TuT7%w^Cw^9IPHJtM|Y058lsPB`|13kYaNI#AKzEmel+6*#m$! z+_=lBbzs_;;n%isQ^LUE$6KTPt%wgzY(yE~*4wDS$Kq@ooEb2*Nuwbf>9YT3Y4$GM zXBdKHl4qFenprSoCdE}DKoG#}hg6%2sQ)1bmKu!1PuITOz8B$hzzD}7{ppe&Fxlxk zZO8M13lv~6WYL26V+Rt{PEy-tx(Duv$~DkW@EjPLnuw8VIxNN%;C=}TNN`YnAQJ_H z7}!;nxnffP;?yzX@pOelBsr-3Ra2<`&B?l|V4#I2lx(-$2Ks|&J>b2dH$}gX+?M+o zpUMsgyCy85R)MLLL8`1qXv*X@O43 zqtcP;1Dyd&xy$;$RsErT2AKh4o|B(TT*kPy>R5#*aAbf|rhIhNicW1{(^m&vVgmP5 z3DfJp*=vAJY>Ztdy?#t-i{__@kF6?^z`x}SEbu)eui4?22Veqj=cGEDs*y)|V2>_< zY9=OwX%x%{fC<#r_h@9$*B*j-tJ6qTTVt$!A3IBfJtXzt7GX+3J$$73Z?m+^jY)lp zeHQ@8)fF)f)|U*hp#pSP?6Rq{U}Swk21{}aU$_80EvM;In8#ov#(H`~jZCyU7rV`Y zA>LY<0CU}!B#zl~_xjks1un!tXACpqS;8lSzV8xe`OaUp99in6sOUt>3U^? zadYg@|KB95GCKRV5=pG<#sAHMyu~l`x^rpPV`n~;Q(VRZn<`dnCbEdrHMo58S;oSQ zUmoxjD=dSa>%~CE$Q0nC)XZU0=+T(lOiXr`Nym|CeiYFbi12 zz%uZ93vG!D_R4tUbDF@9?;M*lPw{+GJ94j}KIBdO|GzhJpXlFZStqjIcpS5L;`K+b zhI@jb?>h%E$L#dflU(@VZUgEI_jIHP6Z5Ues|y_F4e6Hh&DMALp^mK-79#Y0b}~cj zKfJuM|DGoA@ zTP8SPDAlm;z2qQ5J?Bus1(uU=sQ*($g#1ht*Uu#IobZkTEmaioACJlqAL$BVLe4Oi z5Ey7AWj*;!q^Sjwff<6pctePQ)r881YP7J^I7XG4a+KQm@^j+yR5Bwmfjb%unBD%= z{Jeh46UTneGD0qbk3+8WX`%!lOAj2ix3VY;$X#)dO;}Ba8*0w$3O@>)0J()Nc+#yB zOLfenB$d6>I>$+T&c{gt+;OZu*H?c#_SpM$?z>%3 zQ@o5C6nGFeT_^LD@>YqdGNruc9o3{jLTkT}-&7xQRmN4xGS;|}a=vO594bjcgf=&3 z_9<^KwIs$#;v?^?nXn)^jw!pddz4vE3fMNul*d)+GA3`U)fDe?roTF?bFAFwIdtrQ zuCHR-SV5N^^^;qLNJWvWzzp^*_QAnJMXvW<*`aEbbN!V;ny4T#E??huh2NC2UV)la zEB&d@6ZLHyP24E$C_dtLrinHv1K$c|d13bm#y=9evZ)kKDK^K#jvd&9YWUfyp1;%= z{-nBI^)ULj8h9jEFv+fa+r#_ZJP=H|`2w3Ym4n-;_dwR!nRvlL6f9 zRQn-qR6%h<$SKuF*`d=$_`V;+t+X`aG2pWtskv=qvJ;u|yeg{-fFBiiWD+;Ho`C|A zkMZm%sG@pQ1*~L`sh|{)ADf&vKg{81QjICV0=Q)zgM#jW0*;Mg@LR93M;^<*3!&CmwxkG3K zG`Rx6%23eVyO{jW6u<^F2;Whk6l%)AK3MRSS8L_aJr6DEo-=S)U!&K6@VZRlG*^sB% zuVRKl6<#20bF<)6fX+)7vy}>t7 zH8`SdR!SJTJ+_?Q9E)L-cpg;J1-vy-4mS8j*<4DN2T}P+6%zMQUs(+UPr^^)D8a`P zCZX^7wH0m^{A+LU3{)|M94mxkY?5O%#t=c|%}7S&$M{ARxmE*9%pl#OfLjzmwJNKb z-29-dCh%NA5b8jf2M|TfrGg5g2F%0^M*@a=q@%3X8K#en<5+*pPDPs5@&cj)E&rq% z^SNLpw~7$(XCR?iz&LoYjIFGu_$Ozyf*-}l!d!azqsJN$&fl(B+W7dTm01lY1G@_> z!O|5nP5{XP5-x`fhA3-HF(Cp=@~22#Q=>NTaFGgfhE)zqR5 zC^(dp@F+n$AXNW(L=C4?rUg=pKs)qzT-ATP%7U~6a5=-0Tj0i~vKlQdmGVFxsS0J# zmk0XSfKSwLtkMcQ2H% zx>8!6X^SRYZybZ{M-r-hTcXWbfAuL5xhqi9ztuzxz^j_XtflQeu>#tdye+Ta?#V z>0Vj8lnPn?U7>Um`CT)HwEA^U4 z&ntEP+WXflL`*V!&|52Og#TPaGR*!HtM!SH_B~IxSj*N=mcKNLu6~`YXNCtmH3 zj(W9+nVdaT&@>lsVVDRq5&f368||U4?)T=D9eLk@t3{&!OO#v)-_E__SuMtr91~;e zH*!7XqnzJLAGJT!9B|;deQWo*yPWElrlWCq*e6*r;j+>ZAN7Wm-jQeg-kzf-UnnSv z-_;Iv3I57`MeI?yD9GJDvf=Ny=5#l)_lI?yF=sXLIXaPVE2nJHOpO`iS$fl3f+O4Q zS5Wu4@?FoJ-EFN43yQZjS5h;XHR9yiMW}tHXIT#M^dn`H8`qt*qdo3TQTXh3M6orr z7Bqa~{MEPv>o}cp@RJaEFD_>tIIOaIH$~6CA*FG~cO*rca!tr*A+@hyv-T@eJ!?l_ z`7%^jKFeUjD5V0eK6}}dpQ!WMqp&vD$NtyH!HXJU!bubJ6Sfb01*bJAld05HEV_s7qOt~v{g(jB<&X*)j~v;QftC4~M&uaxyeSoR=! zf__z}hy-^x{_Ujm^1B8e(HXl5uEz9+INfTh_wk>~P~n<)k@YUccC8W3fwUyDe%GQV zMNy3c4wjo|j~M!`+*=e{-1z1ur?u(Tei+NvMT@OHed}CTC^TGJIVM-T_6QwZ(oqzU zG^7%`LR-`RjZlH_2447eMjB_-Uzc*WD)ycqVHDr}M zF44D;B}%8KZF{GTc{FB9icVKv?cmjpjamP5m)ji{eX-4}MefrXqw`Jqt)lavEgpM~ zZ7|umM)=(b){d4deQh&q_hhNQ##51rmZU|gGoDM-zOu5a)t>6MU*UjlMO7(jfZWRo zm%b>UvaAwU3-_ujzLoKF3nR|z#wmtqxv&<*wer9V|5}$*sHMfYn6yxN>Q^$|V#Mpb zdw%6fQp^6Fj`*>N>s@}Xi8G(hjCg@T;pR%S_MEZXteKTV2(Q6}Eh#J3Ad_iqC zH~pXl zozL%HcwG2FaGcmSdZFCCuyVsGxKOl+Zv6J=*4_<8nT$_m=Pn95<^+~D!+QEU7acsc zr9UB}rDZ#kFt)cfy6iJNJ7b$)Ar+#fMa8_j_T$UtT(j?mZhwn`it6vkqEh}(0A?4N z=%sdL)c}_sKrOFor&0BOO{i0l=fkf{&TlI!D%yPO8+HPp%jw9^lR!sC6`)@k#hE7~?YG&ew2 zJb!S{;7OLIBmDr zd*|taVV1Ie5^7y)ksmPj6B+J!C+hU7NmxQ|NQ{Atj+{PsX zX)du2OJ%m=>nSFg)8L&RFO)Vim6Vl6Mpk=2hEZx^0)nw3S}{%HZ8si((dbL zX{nj>F9B-W7L66_=J!6yyb|Axg|>9D5=Od`iIEnaZSFhBnm5*7hyfjNF9RZlNqC9`DTn|Yzy^Mn8yegMt>q!Xinsz@sg|BI4ua#@svC!_%-(FIKiJC^QPB006*yuRe7qcxwv~{8pXTzUeA-BbG_|fJ(ARx`+Q87 zUuyAVzfVdkM+eqIsn~m)k$5sV{!DTa>%L{mZRVpH@!xv-BHk_yMEx36zM+`fBt`{ea+Z*OlNKYsk?kJopPU%mhR_xJDa9{=+4 z8MaWz?0j6w^wL z=wiCPj6yIFO^DgpK-;HZIfX7ICfE+SY0}0z-_QA))&~(T?=TJ1Xvrdy+nXtFz$Ip7t!*72cBDtsvBp2n7T(1gEu9w$@D$#^QnqVcG z;Gw4Oej?lgBsZ%JLb6I2FLk2--TnUS*UdT8-S4(`-F$Dm`@Pk!i$r|k8TbnNc25>M zz02y&$%4GU&B%(L?qi0d?oBa{z$cEQ(c3~CQJ*;uI

hOb9&rxNhLJChnSRJH z!>N4CIFfE#jw6X0gPDHNjv@)8hM9iUjv}FBn&8%jd>4=_yG)_L&#$$Pl# z*v;$q&6U;q@eA0uGeRbpz`7j=69I^f{x2XdQU;Og*glBssVhOMN|25csHy}iD^UPY zsoZ-F#eYNu03%);zlNC7z54}?*zPx63Q49RCy-nugCv!&VkFm-(uB%uLXw(bc}-A8 zQyG-P@}5MJJ)BH%90du$X-ZO%1`KoRER6^Sn~2RpLX!F-qBKfopg+oXQ#8NFeIEL& zB*k$xdus6AX<7}CAp{Ih01Xz)7}Fo3&31J}_zg>zi0f77NOpUcC4wq+1SvTJD|7@( zJJQXEwaj*30rs#FWu^i0u(=?X85YRHqO++*N^0TR)T%^kJA=@JJ~fF@q@ip4sX>Jr z4ei*Snq#QZK$Wf|MPB z6*(dwR(Le1@-q+xtRv3Lk4WUPj>S!Wf+CNF%IQK%x?nk7(%IM!W@N!~ z4h5#_jCFK46op7<>Z8K}oz(}Z=L0O~!#JEWLSpK#BfR+sr<5buhJF2W&S)Hs>-_w1 zU6(8tK{C1J=b%1#2Xz9{U}Mafu@0xk>>bP*ThfkcPTA7fd!^gJTfW?KN{ zWRD!SP6I^=WhQ=Q+*|Vt%C;&BSXR|;0Nd*DC$8k_XJ!Tr%cNqrcE@7(J*Yo`pBLTx z;lS%$JPlCiV%US73-@t!Xodo@gQ&LmNVU)7ztyx`TSB81240W?rv*OVw$#W7HHv{K zxbIt*b#0;KSpnH>~knP7Mx*o;#{-8E%i)+vCk2{{iZT#$vh1001A0 z2m}BC000301^_}s0sveN?c7_B+r||D;Ai!>j3(zg7ZQ3<{-1~ao3}CA06%|301L3%?lw?wZ#QWL96BYgw{JF5zj(VLiJ`vT zZ=int>Snv!VDsAT%MiGJ{ceNag0Ek{gALU0UZho}djIwPhAnBRzj?QR_4SL*n#sEQ z{VzZK^y&MbH?QB-+c)3d{P}+4G&GCb{YIr(+`hiwC_!4>Z}%HV(v05TZ$gxMd)pia z&BAuyE~G=!&%{9-Y8u%On{Jz(cl}Yj{-|AZ6s3jzFw*b*)#2yyfM%uNC9O1X*RSj+ zTe1^$3m<>X&XiV${XhOV{s?JhKiP6S9KkxgW4%^}FJ{xXU(s2Ko zU?k^5zden%+xPil`|+^b>`mI0ul{l~UU7BZ#&k2>Y@oZ2-~RI9)8U7WLVq>80U54o zH^AIq*?y1EU*B%PIoDm~ZlJ%``RaH==q~qU>ut7mH$ByEdaB*@RJ-Y^cJotpAN^uz zdQkUC&xiJfy=JFx@0P{T?7;TUnGfv?dyS`G-91#>7uAcm)!n=5?W>jj!d76Qg34BQ z1z$jg1}dz$0>g&286pD}RjvZ#jE6tH&%_B6aYD7L9#*^6!-~A;3(TR;905RbtvFwP z_wU2I`|m#dYyZoS8?c^Tz5C(&pMLuO!#}2@i6*bm()9AfUq2lVfBXK!KXxAvfB$=X z(`zpl`q3=*jaSif$JOic-4a~{mScT?`0j6SKmL3Aj;&{ogj_nRm*h-MMgu?(N@a`n<>9@)eEzTMCSKI!2@9O-iwFa|oTDIP)4atKZj zYdXRq+w|-0utq3z$Tq67Kde#9FtX*Sgoib_0+CY~Er@qCVkRgXur;rj(C6%S5 zekmy~CB>zre37JFpp5f#V@c*vQh)5t!(AQI0Fs1 zwnNsoe;Q-$psZ~UoJotInY0L6?W*l+x7w~aL~nh#R9aRF>C1n)wB(btEZ&u+T~Dbc z$*7f%TIZM6Ht&~0#b^-=-jXlJkA$TFvi%CBZ+atN$E^=Nw|)#D5t7Zlw-8+*&A}Cl zkzEchNkq=UMcp_POOlH#VInR~#GOQ%llwXo7i0`J zElLo7Sd)~+O_#)z9@glNi5oPyBr283CCeh3&LCxS$vOd`3(4g&paD`n<=mcER^k)e!6M0>L%PvTJk{P%t$Z;yJ;7nY3sR1g<_ z7vhpYCN2q7+eGd*nd^clE1aAA^398zS20>B=G4ee(#3qUaZ`2IU{#muCUG%<^GAbQ z=cE(YyAfP$a&d~*X#rezSiiZs+djPf=H}an`|Zm&-|k;N+%yQwQef852#l7127$@y z81TM$+XIZ~`Y&0E*IZ9<1lJRcC_~7L&ciZ`w}8G0mu~y)dLUPYBKYIEJPpZwpTBL= ziOyy7=yGT$={&olv{^JePv|9gb{g2aoY^6xXE>MAr;h|Y1kGgy8)I}R1F(1`v9~dL zae1hFHpM3^K*WbN#yP8qfv2!Wn!y^~3|7Z8Si3{U?=%vh$@DE?(5R;q{uF2{cK6PO ziGNGX(ac|@enjez1wRY9Ke>wlEkvxcNtZF1hpGTU z(TBJ!dmgWL4zcJG$a1w4b04p*cH&VS;V6#fY9~Ch+KEpsa^k7dX6$0i(K8R{#ABF? zC$fko6Y~_^^A$N~q2g1EoVb+kw_W7?av_~9T4!h45yWnM;hj#r^Lg!)e5!rkt#+%s zRdB&Zx!5}l=$q+}0(YLV#9d$gB#beinaQf0fT&HOlY$NLc=k=nt)*%y^h~OWMba_* z)?0%yU~=bX&306seOp?^OMh?vgq8TcV-RNLqwz}PKf|?WViT z&bnKkbtbMUm^-~GmwPVWWEN{?H{+j6gJWK8O8%OXz@}g@nB#G$GceDn(~BKa>jvs6 z)fCf|)SdWzJJ;iP8t5u8>-%8l!X+#{$I>V_$)!oMNx#)HWgC0$yCpiQtaamMg5kIJfWe8nTXs~L1}YGXozVRWD%5#g@C~y z*5C_4DW45W!}VFXgR6ql^p8fCa5gwa4W8>OVpFP5#imq$*3k6P$vD-Qgr;;9#T04k z5Q3tYNyyqI={y9cDPl!eNg`B&QE|C=>T>baRd-cN#dj<6QPNb+f;@}?6|bu@hS-wl zJRPqxrkuQ=3(v#T7^n+M`FQ71slu3wn_WaoK4%c!RSX`Ph`Bx$_YgR6eN8HUA%DkH z@!<^F1orQ-#t1e#OuYh=QX|+HB1*B~cs+Hr2oNLq7RspmPKQjgB5hlNxeg**8Q6VU2=_rfInWBkLtKivDnz5iSHBIfQztopWd_R$7k z$ZwemK+Eo>wwQu@;JXAD`UJP=Ug+J+*-P)2*kiEbN)<@-P1>s#7n3jDPt?liAdd2- zo};pSSq(CuQ-)DKmZ!*<%A8KFk?+ay4etHk*^5yiF4&7PaM3*q^|ces^x5ZIP$RK> zKKMR!PD{Twe6NaA$zye|@K8+#oh0PMILK34kvydpal`yFGpC18mdZKu(HB~~*J+N| z6(^FZHI75XqpgOi_qii3X2waR-nby8+Ft79Yg!Afl#m18hj&bI65dG>IT;&gAokyh zO=zbh_xZ$ggPi7#GmPSb7OQ+|UfRd{X#Y%i+vnP0eY$~$j&gTNd2ce;ob`1-0z4Wk ze8GwnPLI9#U$>pcSw-=DSd%Uadye8vKZKcnGM{b6xqi58C|tH`lNV2Vw!nuDL%MvI zx7oChPaS0t>nr!)4yKlxwhMdusG+SW6#+>rpC&Xsnc(7tz%U^& z4D%SNjmFb8Erl^rS9t1WsUkMbFU@n$K;cw3VJLSKc9YNE`clTLzEzQ|E>uK2l=_0p znh7-tdd|cUMib?%i5CWb;8M~1#7sva=Y4X@J5cz0j{GiZCR}PakM#)7YA5i#u^yyQ zU#Wh+=9|y<*YUD! z){hTQXB{;sI%yzyY0~78If@a4i%m6fq^AaZ@APC(4P4Vx!(&ekpX{lDGd(pE1!?T5 zNsAZs)a0A;CYW8I(Sa_HZU3#AHY$pv|&`2|TrjIUeq=73MX>iFbUED|mpRbWdn+_Nq zyJSN1V$;I-P~YUgt1!-3O*3-w zV&3#Pc6)n-`gEra1)_4N4KYUy^%b2qQePB*a(yiR>`&0;V!xUAGhyj`_t@Vx1<2Gl zyQn`pb)}G07SFfLJ(YO^E{pP`zb+(AcK`$@bT&3+l<2U2w%!`49~qibeMx94_5Ua| zjRvY%kx!E@-Z1c)o*UwXB3GPBku1g&{OP}9uv(H?OaK5MiwFb&00000{{{d;LjnLB O00RI3000000000M9NZHC literal 0 HcmV?d00001 diff --git a/vcfpp.h b/vcfpp.h index fd9acf5..8d12822 100644 --- a/vcfpp.h +++ b/vcfpp.h @@ -2,7 +2,7 @@ * @file https://github.com/Zilong-Li/vcfpp/vcfpp.h * @author Zilong Li * @email zilong.dk@gmail.com - * @version v0.5.2 + * @version v0.6.0 * @breif a single C++ file for manipulating VCF * Copyright (C) 2022-2023.The use of this code is governed by the LICENSE file. ******************************************************************************/ @@ -49,6 +49,7 @@ // make sure you have htslib installed extern "C" { +# include # include # include # include @@ -94,6 +95,9 @@ using isFormatVector = typename std::enable_if>::value, bool>::type; +namespace details +{ + template isScalar isMissing(T const & v) { @@ -151,7 +155,7 @@ inline std::vector split_string(const std::string & s, const std::s } // deleter for htsFile -struct htsFile_close +struct hts_file_close { void operator()(htsFile * x) { @@ -159,8 +163,35 @@ struct htsFile_close } }; +// deleter for hts iterator +struct hts_iter_close +{ + void operator()(hts_itr_t * x) + { + if(x) hts_itr_destroy(x); + } +}; + +// deleter for hts idx +struct hts_idx_close +{ + void operator()(hts_idx_t * x) + { + if(x) hts_idx_destroy(x); + } +}; + +// deleter for tabix idx +struct tabix_idx_close +{ + void operator()(tbx_t * x) + { + if(x) tbx_destroy(x); + } +}; + // deleter for variant -struct variant_close +struct bcf_line_close { void operator()(bcf1_t * x) { @@ -177,6 +208,8 @@ struct bcf_hdr_close } }; +} // namespace details + /** * @class BcfHeader * @brief Object represents header of the VCF, offering methods to access and modify the tags @@ -380,7 +413,7 @@ class BcfHeader * */ void updateSamples(const std::string & samples) { - auto ss = split_string(samples, ","); + auto ss = details::split_string(samples, ","); const int nsamples = nSamples(); if(nsamples != (int)ss.size()) throw std::runtime_error("You provide either too few or too many samples"); @@ -466,7 +499,7 @@ class BcfRecord private: BcfHeader * header; - std::shared_ptr line = std::shared_ptr(bcf_init(), variant_close()); // variant + std::shared_ptr line = std::shared_ptr(bcf_init(), details::bcf_line_close()); // variant bcf_hdr_t * hdr_d = NULL; // a dup header by bcf_hdr_dup(header->hdr) bcf_fmt_t * fmt = NULL; bcf_info_t * info = NULL; @@ -1517,9 +1550,9 @@ class BcfReader { private: std::shared_ptr fp; // hts file - hts_idx_t * hidx = NULL; // hts index file - tbx_t * tidx = NULL; // .tbi .csi index file for vcf files - hts_itr_t * itr = NULL; // tabix iterator + std::shared_ptr hidx; // hts index file + std::shared_ptr tidx; // .tbi .csi index file for vcf files + std::shared_ptr itr; // hts iterator kstring_t s = {0, 0, NULL}; // kstring std::string fname; bool isBcf; // if the input file is bcf or vcf; @@ -1529,7 +1562,7 @@ class BcfReader BcfHeader header; /// number of samples in the VCF int nsamples; - /// number of samples in the VCF + /// a vector of samples name in the VCF std::vector SamplesName; /// Construct an empty BcfReader @@ -1575,24 +1608,25 @@ class BcfReader ~BcfReader() { - close(); + if(s.s) free(s.s); } - /// close the BcfReader object. + /// Close the VCF file and its associated files void close() { if(fp) fp.reset(); - if(itr) hts_itr_destroy(itr); - if(hidx) hts_idx_destroy(hidx); - if(tidx) tbx_destroy(tidx); - if(s.s) free(s.s); + if(hidx) hidx.reset(); + if(itr) itr.reset(); + if(tidx) tidx.reset(); } - /// open a VCF/BCF/STDIN file for streaming in + /// Open a VCF/BCF/STDIN file for streaming in void open(const std::string & file) { + if(!fname.empty() && fname != file) + throw std::runtime_error("does not support re-open a file yet. " + fname); fname = file; - fp = std::shared_ptr(hts_open(file.c_str(), "r"), htsFile_close()); + fp = std::shared_ptr(hts_open(fname.c_str(), "r"), details::hts_file_close()); if(!fp) throw std::invalid_argument("I/O error: input file is invalid"); header.hdr = bcf_hdr_read(fp.get()); nsamples = bcf_hdr_nsamples(header.hdr); @@ -1611,12 +1645,44 @@ class BcfReader return header; } - /** @brief get the number of records of given region */ - uint64_t getVariantsCount(BcfRecord & r, const std::string & region) + /** + * @brief query the status of a given region in the VCF + * @return -2: the region is not a valid bcftools-like format, + or it is not presenting in the VCF even though it's bcftols-like format. + -1: there is no index file found. + 0: the region is valid but empty. + 1: vaild and not empty. + */ + int getStatus(const std::string & region) + { + try + { + setRegion(region); + BcfRecord v(header); + if(!getNextVariant(v)) return 0; + } + catch(const std::invalid_argument & e) + { + return -1; + } + catch(const std::runtime_error & e) + { + return -2; + } + return 1; + } + + /** + * @brief count the number of variants by iterating through a given region. + * @note If you want to continue work on that region, remember to reset the region by setRegion! + Also, check the status of the region first to handle the different cases + */ + int getVariantsCount(const std::string & region) { - uint64_t c{0}; - while(getNextVariant(r)) c++; - setRegion(region); // reset the region + int c{0}; + setRegion(region); + BcfRecord v(header); + while(getNextVariant(v)) c++; return c; } @@ -1633,54 +1699,61 @@ class BcfReader } /** - * @brief explicitly stream to specific region - * @param region the string is samtools-like format which is chr:start-end + * @brief explicitly stream to specific region. throw invalid_argument error if index file not found. + * throw runtime_error if the region was not a valid bcftools-like format or was not presenting in the + * VCF. + * @param region the string for region is samtools-like format, which can be 'chr', 'chr:start' and + * 'chr:start-end' * */ void setRegion(const std::string & region) { // 1. check and load index first // 2. query iterval region // 3. if region is empty, use "." - if(isEndWith(fname, "bcf") || isEndWith(fname, "bcf.gz")) + if(details::isEndWith(fname, "bcf") || details::isEndWith(fname, "bcf.gz")) { isBcf = true; - hidx = bcf_index_load(fname.c_str()); - if(itr) bcf_itr_destroy(itr); // reset current region. + hidx = std::shared_ptr(bcf_index_load(fname.c_str()), details::hts_idx_close()); + if(itr) itr.reset(); // reset current region. if(region.empty()) - itr = bcf_itr_querys(hidx, header.hdr, "."); + itr = std::shared_ptr(bcf_itr_querys(hidx.get(), header.hdr, "."), + details::hts_iter_close()); else - itr = bcf_itr_querys(hidx, header.hdr, region.c_str()); + itr = std::shared_ptr(bcf_itr_querys(hidx.get(), header.hdr, region.c_str()), + details::hts_iter_close()); } else { isBcf = false; - tidx = tbx_index_load(fname.c_str()); - if(tidx == NULL) throw std::runtime_error("error in loading tabix index!"); - if(itr) tbx_itr_destroy(itr); // reset current region. + tidx = std::shared_ptr(tbx_index_load(fname.c_str()), details::tabix_idx_close()); + if(tidx.get() == NULL) throw std::invalid_argument(" no tabix index found!"); + if(itr) itr.reset(); // reset if(region.empty()) - itr = tbx_itr_querys(tidx, "."); + itr = std::shared_ptr(tbx_itr_querys(tidx.get(), "."), details::hts_iter_close()); else - itr = tbx_itr_querys(tidx, region.c_str()); - if(itr == NULL) throw std::runtime_error("no interval region found!"); + itr = std::shared_ptr(tbx_itr_querys(tidx.get(), region.c_str()), + details::hts_iter_close()); } + if(itr.get() == NULL) + throw std::runtime_error("region was not found! make sure the region format is correct"); } /** @brief read in the next variant - * @param r r is a BcfRecord object to be filled in. */ + * @param r the BcfRecord object to be filled in. */ bool getNextVariant(BcfRecord & r) { int ret = -1; - if(itr != NULL) + if(itr.get() != NULL) { if(isBcf) { - ret = bcf_itr_next(fp.get(), itr, r.line.get()); + ret = bcf_itr_next(fp.get(), itr.get(), r.line.get()); bcf_unpack(r.line.get(), BCF_UN_ALL); return (ret >= 0); } else { - int slen = tbx_itr_next(fp.get(), tidx, itr, &s); + int slen = tbx_itr_next(fp.get(), tidx.get(), itr.get(), &s); if(slen > 0) { ret = vcf_parse1(&s, r.header->hdr, r.line.get()); // ret > 0, error @@ -1707,7 +1780,7 @@ class BcfWriter { private: std::shared_ptr fp; // hts file - std::shared_ptr b = std::shared_ptr(bcf_init(), variant_close()); // variant + std::shared_ptr b = std::shared_ptr(bcf_init(), details::bcf_line_close()); // variant int ret; bool isHeaderWritten = false; const BcfHeader * hp; @@ -1783,8 +1856,8 @@ class BcfWriter */ void open(const std::string & fname) { - auto mode = getMode(fname, "w"); - fp = std::shared_ptr(hts_open(fname.c_str(), mode.c_str()), htsFile_close()); + auto mode = details::getMode(fname, "w"); + fp = std::shared_ptr(hts_open(fname.c_str(), mode.c_str()), details::hts_file_close()); if(!fp) throw std::invalid_argument("I/O error: input file is invalid"); } @@ -1799,7 +1872,7 @@ class BcfWriter */ void open(const std::string & fname, const std::string & mode) { - fp = std::shared_ptr(hts_open(fname.c_str(), mode.c_str()), htsFile_close()); + fp = std::shared_ptr(hts_open(fname.c_str(), mode.c_str()), details::hts_file_close()); if(!fp) throw std::invalid_argument("I/O error: input file is invalid"); } @@ -1872,7 +1945,7 @@ class BcfWriter } /// streams out the given variant of BcfRecord type - inline bool writeRecord(BcfRecord & v) + bool writeRecord(BcfRecord & v) { if(!isHeaderWritten) writeHeader(); if(bcf_write(fp.get(), v.header->hdr, v.line.get()) < 0) return false;