diff --git a/Rcpp/vcfpp-read.cpp b/Rcpp/vcfpp-read.cpp index dbd29af..bfb4c74 100644 --- a/Rcpp/vcfpp-read.cpp +++ b/Rcpp/vcfpp-read.cpp @@ -42,8 +42,8 @@ List tableGL(std::string vcffile, std::string region, std::string samples = "-") CharacterVector chr(nsnps), ref(nsnps), alt(nsnps), id(nsnps), filter(nsnps), info(nsnps); IntegerVector pos(nsnps); NumericVector qual(nsnps); - vector> GL(nsnps); - vector gl; + vector> GL(nsnps); // R only have double + vector gl; for(int i = 0; i < nsnps; i++) { vcf.getNextVariant(var); diff --git a/readme.org b/readme.org index 32792fc..ca14089 100644 --- a/readme.org +++ b/readme.org @@ -21,7 +21,7 @@ Features: - [[#installation][Installation]] - [[#usage][Usage]] - [[#reading-vcf][Reading VCF]] - - [[#genotype-values][Genotype values]] + - [[#genotypes][Genotypes]] - [[#writing-vcf][Writing VCF]] - [[#variants-operation][Variants Operation]] - [[#header-operation][Header Operation]] @@ -69,7 +69,12 @@ int main(int argc, char* argv[]) } #+end_src -** Genotype values +** Genotypes +There are 3 types used for genotypes, ie vector, vector +and vector. One can use vector and vector for +memory-effcient goal. The downside is that it only stores 0 and 1. And +vector can store missing values and multialleles. + *** Code genotypes with missing allele as heterozygous. If you use =vector= and =vector= to store the genotypes, then diff --git a/test/bcf-reader.cpp b/test/bcf-reader.cpp index 2753f3e..0b06636 100644 --- a/test/bcf-reader.cpp +++ b/test/bcf-reader.cpp @@ -63,3 +63,22 @@ TEST_CASE("parse vcf with missing genotypes diploid - vector", "[bcf-reader } REQUIRE(n == 1); } + +TEST_CASE("parse vcf with multialleles - vector", "[bcf-reader]") +{ + BcfWriter bw("test-multialleles.vcf", "VCF4.3"); + bw.header.addContig("chr20"); + bw.header.addFORMAT("GT", "2", "String", "Genotype"); + bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); + for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples + bw.writeLine("chr20\t2006060\trs146931526\tG\tA,T\t100\tPASS\tAF=0.02\tGT\t1|2\t1|1\t0|2"); + bw.close(); + BcfReader br("test-multialleles.vcf"); + BcfRecord var(br.header); + vector gt; + while(br.getNextVariant(var)) + { + var.getGenotypes(gt); + for(auto g : gt) cout << g << endl; + } +} diff --git a/vcfpp.h b/vcfpp.h index 6b34d30..11ee268 100644 --- a/vcfpp.h +++ b/vcfpp.h @@ -163,7 +163,7 @@ class BcfHeader public: BcfHeader() {} - virtual ~BcfHeader() {} + ~BcfHeader() {} /** @brief print out the header */ friend std::ostream & operator<<(std::ostream & out, const BcfHeader & h) @@ -366,7 +366,7 @@ class BcfRecord gtPhase.resize(nsamples, 0); } - virtual ~BcfRecord() {} + ~BcfRecord() {} /** @brief stream out the variant */ friend std::ostream & operator<<(std::ostream & out, const BcfRecord & v) @@ -1238,7 +1238,7 @@ class BcfReader if(itr) hts_itr_destroy(itr); } - virtual ~BcfReader() + ~BcfReader() { if(fp) hts_close(fp); if(itr) hts_itr_destroy(itr); @@ -1346,6 +1346,7 @@ class BcfWriter bcf1_t * b = bcf_init(); kstring_t s = {0, 0, NULL}; // kstring bool isHeaderWritten = false; + bool isClosed = false; public: /// header object initialized by initalHeader @@ -1408,9 +1409,9 @@ class BcfWriter initalHeader(h); } - virtual ~BcfWriter() + ~BcfWriter() { - close(); + if(!isClosed) close(); } /** @@ -1444,8 +1445,9 @@ class BcfWriter void close() { if(!isHeaderWritten) writeHeader(); - if(fp) hts_close(fp); if(b) bcf_destroy(b); + if(fp) hts_close(fp); // be careful of double free + isClosed = true; } /// initial a VCF header using the internal template given a specific version. VCF4.1 is the default @@ -1465,7 +1467,7 @@ class BcfWriter /// write a string to a vcf line void writeLine(const std::string & vcfline) { - if(!isHeaderWritten) writeHeader(); + if(!isHeaderWritten && !writeHeader()) throw std::runtime_error("could not write header out\n"); std::vector line(vcfline.begin(), vcfline.end()); line.push_back('\0'); // don't forget string has no \0; s.s = &line[0];