diff --git a/test/Makefile b/test/Makefile index c09983b..400191d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -3,9 +3,9 @@ HTSLIB = /usr/local/lib # detect OS architecture and add flags CXX = g++ CXXFLAGS = -std=c++11 -Wall -fsanitize=address -# CXXFLAGS = -std=c++11 -Wall +# CXXFLAGS = -std=c++11 -Wall -g LDFLAGS = -L$(HTSLIB) -Wl,-rpath,$(HTSLIB) -LIBS = -lhts -lz -lm -lbz2 -llzma -lcurl -lpthread +LIBS = -lhts INC = -I. -I.. -I${HTSINC} OBJS = test-main.o bcf-variant.o bcf-reader.o bcf-writer.o example.o diff --git a/test/bcf-reader.cpp b/test/bcf-reader.cpp index e43c581..73c3fc3 100644 --- a/test/bcf-reader.cpp +++ b/test/bcf-reader.cpp @@ -64,26 +64,6 @@ TEST_CASE("parse vcf with missing genotypes diploid - vector", "[bcf-reader REQUIRE(n == 1); } -TEST_CASE("parse vcf with multialleles - vector", "[bcf-reader]") -{ - string vcffile{"test-multialleles.vcf.gz"}; - BcfWriter bw(vcffile, "VCF4.2"); - bw.header.addContig("chr20"); - bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); - bw.header.addFORMAT("GT", "1", "String", "Genotype"); - for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples - bw.writeLine("chr20\t2006060\trs146931526\tG\tA,T\t100\tPASS\tAF=0.02\tGT\t1|2\t1|1\t0|2"); - bw.close(); - BcfReader br("test-multialleles.vcf.gz"); - BcfRecord var(br.header); - vector gt; - while(br.getNextVariant(var)) - { - var.getGenotypes(gt); - for(auto g : gt) cout << g << endl; - } -} - TEST_CASE("parse PL in vcf - vector", "[bcf-reader]") { string vcffile{"test-GL.vcf.gz"}; @@ -98,11 +78,9 @@ TEST_CASE("parse PL in vcf - vector", "[bcf-reader]") BcfReader br(vcffile); BcfRecord var(br.header); vector pl; - while(br.getNextVariant(var)) - { - var.getFORMAT("PL",pl); - for(auto g : pl) cout << g << endl; - } + REQUIRE(br.getNextVariant(var)==true); + var.getFORMAT("PL",pl); + for(auto g : pl) cout << g << endl; } TEST_CASE("parse GL in vcf - vector", "[bcf-reader]") @@ -119,9 +97,28 @@ TEST_CASE("parse GL in vcf - vector", "[bcf-reader]") BcfReader br(vcffile); BcfRecord var(br.header); vector gl; - while(br.getNextVariant(var)) - { - var.getFORMAT("GL",gl); - for(auto g : gl) cout << g << endl; - } + REQUIRE(br.getNextVariant(var)==true); + var.getFORMAT("GL",gl); + for(auto g : gl) cout << g << endl; +} + +TEST_CASE("parse vcf with multialleles - vector", "[bcf-reader]") +{ + string vcffile{"test-multialleles.vcf.gz"}; + BcfWriter bw(vcffile, "VCF4.2"); + bw.header.addContig("chr20"); + bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); + bw.header.addFORMAT("GT", "1", "String", "Genotype"); + for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples + string l1 = "chr20\t2006060\trs146931526\tG\tA,T\t100\tPASS\tAF=0.02\tGT\t1|2\t1|1\t0|2"; + bw.writeLine(l1); + bw.close(); + BcfReader br("test-multialleles.vcf.gz"); + BcfRecord var(br.header); + vector gt; + REQUIRE(br.getNextVariant(var)==true); + auto l2 = var.asString(); + REQUIRE(l2 == l1 + "\n"); + var.getGenotypes(gt); + for(auto g : gt) cout << g << endl; } diff --git a/test/bcf-variant.cpp b/test/bcf-variant.cpp index b25b1bc..165fc44 100644 --- a/test/bcf-variant.cpp +++ b/test/bcf-variant.cpp @@ -10,13 +10,12 @@ TEST_CASE("set genotypes without missing ", "[bcf-variant]") BcfRecord v(br.header); vector gt; vector gi{1, 1}; - if(br.getNextVariant(v)) + while(br.getNextVariant(v)) { v.getGenotypes(gt); - for(auto g : gt) cerr << g << endl; v.setGenotypes(gi); v.getGenotypes(gt); - for(auto g : gt) cerr << g << endl; + for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == gi[i]); } } @@ -25,18 +24,17 @@ TEST_CASE("set genotypes with missing ", "[bcf-variant]") BcfReader br("test-vcf-read.vcf.gz"); BcfRecord v(br.header); vector gt; - vector gi{1, -9}; - vector g2{-2147483648, -9}; - br.getNextVariant(v); - if(br.getNextVariant(v)) + vector gi{1, -9}; // -9 as missing + vector g2{-2147483648, -9}; // also, bcf_int32_missing (R_NA) as missing + // br.getNextVariant(v); + while(br.getNextVariant(v)) { v.getGenotypes(gt); - for(auto g : gt) cerr << g << endl; v.setGenotypes(gi); v.getGenotypes(gt); - for(auto g : gt) cerr << g << endl; + for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == gi[i]); v.setGenotypes(g2); v.getGenotypes(gt); - for(auto g : gt) cerr << g << endl; + for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == -9); } } diff --git a/vcfpp.h b/vcfpp.h index ff03ca6..2c721a5 100644 --- a/vcfpp.h +++ b/vcfpp.h @@ -157,7 +157,6 @@ struct variant_close } }; - // deleter for bcf header struct bcf_hdr_close { @@ -166,7 +165,7 @@ struct bcf_hdr_close if(x) bcf_hdr_destroy(x); } }; - + /** * @class BcfHeader * @brief Object represents header of the VCF, offering methods to access and modify the tags @@ -394,9 +393,9 @@ class BcfRecord friend class BcfWriter; private: - BcfHeader* header; + BcfHeader * header; std::shared_ptr line = std::shared_ptr(bcf_init(), variant_close()); // variant - bcf_hdr_t * hdr_d; // a dup header by bcf_hdr_dup(header->hdr) + bcf_hdr_t * hdr_d = NULL; // a dup header by bcf_hdr_dup(header->hdr) bcf_fmt_t * fmt = NULL; bcf_info_t * info = NULL; int32_t * gts = NULL; @@ -420,7 +419,11 @@ class BcfRecord init(h); } - ~BcfRecord() {} + ~BcfRecord() + { + if(gts) free(gts); + if(hdr_d) bcf_hdr_destroy(hdr_d); + } /// initilize a BcfRecord object by pointing to another BcfHeader object void init(BcfHeader & h) @@ -601,6 +604,7 @@ class BcfRecord } else { + free(dst); throw std::runtime_error("failed to parse the " + tag + " format of this variant.\n"); } } @@ -634,6 +638,7 @@ class BcfRecord } else { + free(dst); throw std::runtime_error("couldn't parse the " + tag + " format of this variant.\n"); } } @@ -661,10 +666,15 @@ class BcfRecord ret = bcf_get_info_float(header->hdr, line.get(), tag.c_str(), &dst, &ndst); } if(ret >= 0) + { v = std::vector(dst, dst + ret); // user have to check if there is missing in the return v; + free(dst); + } else + { + free(dst); throw std::runtime_error("couldn't parse the " + tag + " format of this variant.\n"); - free(dst); + } return true; } @@ -798,22 +808,26 @@ class BcfRecord // bcf_gt_type int i, j, k; nploidy = v.size() / nsamples; - gts = (int32_t *)malloc(v.size() * sizeof(int32_t)); + int32_t * gt = (int32_t *)malloc(v.size() * sizeof(int32_t)); for(i = 0; i < nsamples; i++) { for(j = 0; j < nploidy; j++) { k = i * nploidy + j; if(v[k] == -9 || v[k] == bcf_int32_missing) - gts[k] = bcf_gt_missing; + gt[k] = bcf_gt_missing; else if(gtPhase[i]) - gts[k] = bcf_gt_phased(v[k]); + gt[k] = bcf_gt_phased(v[k]); else - gts[k] = bcf_gt_unphased(v[k]); + gt[k] = bcf_gt_unphased(v[k]); } } - if(bcf_update_genotypes(header->hdr, line.get(), gts, v.size()) < 0) + if(bcf_update_genotypes(header->hdr, line.get(), gt, v.size()) < 0) + { + free(gt); throw std::runtime_error("couldn't set genotypes correctly.\n"); + } + free(gt); return true; } @@ -1276,7 +1290,7 @@ class BcfReader std::shared_ptr fp; // hts file hts_idx_t * hidx = NULL; // hts index file tbx_t * tidx = NULL; // .tbi .csi index file for vcf files - hts_itr_t * itr = NULL; // hts records iterator + hts_itr_t * itr = NULL; // tabix iterator kstring_t s = {0, 0, NULL}; // kstring std::string fname; bool isBcf; // if the input file is bcf or vcf; @@ -1330,10 +1344,19 @@ class BcfReader if(!samples.empty()) setSamples(samples); } - /// return a BcfHeader object - const BcfHeader & getHeader() const + ~BcfReader() { - return header; + close(); + } + + /// close the BcfReader object. + void close() + { + if(fp) fp.reset(); + if(itr) hts_itr_destroy(itr); + if(hidx) hts_idx_destroy(hidx); + if(tidx) tbx_destroy(tidx); + if(s.s) free(s.s); } /// open a VCF/BCF/STDIN file for streaming in @@ -1355,22 +1378,18 @@ class BcfReader return false; } - /// close the BcfReader object. - void close() - { - free(s.s); - if(fp) fp.reset(); - if(itr) hts_itr_destroy(itr); - } - - ~BcfReader() {} - /** @brief set the number of threads to use */ inline int setThreads(int n) { return hts_set_threads(fp.get(), n); } + /// return a BcfHeader object + const BcfHeader & getHeader() const + { + return header; + } + /** @brief get the number of records of given region */ uint64_t getVariantsCount(BcfRecord & r, const std::string & region) {