diff --git a/news.org b/news.org index 3159907..31dc2ed 100644 --- a/news.org +++ b/news.org @@ -1,31 +1,47 @@ #+title: News and Changes - +* v0.5.1 +- add =BcfHeader::updateSamples= + +* v0.5.0 +- major release with breaking APIs: =setCHR=, =setID=, =setRefAlt= +- new tool: =vcf_nipt=, which splits the output of QUILT2-nipt into two VCFs for mother and fetus respectively. + * v0.3.4 - rename BcfRecord::INFO as BcfRecord::allINFO + * v0.2.0 - major release + * v0.1.8 - fix BcfWriter + * v0.1.7 - add split_string - add INFO() + * v0.1.6 - BcfReader API break change - add setRegionIndex + * v0.1.5 - support multiplodiy - handl genotype missingness. =-9= represents missing value. - update API =setGenotypes= + * v0.1.4 - add updateGenotypes, setAlleleStr - update clang format - bug fix + * v0.1.3 - add some tools - bug fix + * v0.1.2 - fix writer + * v0.1.1 - fix inconsistency style + * v0.1.0 - first release diff --git a/test/bcf-writer.cpp b/test/bcf-writer.cpp index 6293911..8b884b3 100644 --- a/test/bcf-writer.cpp +++ b/test/bcf-writer.cpp @@ -55,13 +55,13 @@ TEST_CASE("init header twice", "[bcf-writer]") TEST_CASE("Write VCF by copying header of another VCF and modifying it", "[bcf-writer]") { BcfReader br("test-vcf-read.bcf"); - BcfRecord v(br.header); - br.getNextVariant(v); BcfWriter bw; bw.open("test-vcf-write.vcf"); bw.copyHeader("test-vcf-read.bcf"); bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)"); - v.resetHeader(bw.header); + bw.header.updateSamples("X001"); + BcfRecord v(bw.header); + br.getNextVariant(v); v.setINFO("AF", 0.2); bw.writeRecord(v); bw.close(); diff --git a/vcfpp.h b/vcfpp.h index bab36e2..1109e55 100644 --- a/vcfpp.h +++ b/vcfpp.h @@ -35,6 +35,8 @@ #pragma once +#include +#include #ifndef VCFPP_H_ # define VCFPP_H_ @@ -376,26 +378,50 @@ class BcfHeader * @param samples a comma-separated string for multiple new samples * @note this only update the samples name in a given sequential order * */ - inline void updateSamples(const std::string & samples) const + void updateSamples(const std::string & samples) { - int sid = 0; // index of the sample to modify - for(auto s : split_string(samples, ",")) + auto ss = split_string(samples, ","); + const int nsamples = nSamples(); + if((size_t)nsamples != ss.size()) throw std::runtime_error("You provide either too few or too many samples"); + kstring_t htxt = {0, 0, 0}; + bcf_hdr_format(hdr, 1, &htxt); + // Find the beginning of the #CHROM line + int i = htxt.l - 2, ncols = 0; + while(i >= 0 && htxt.s[i] != '\n') { - if(sid < nSamples()) - { - hdr->samples[sid] = strdup(s.c_str()); - sid++; - } - else - { -# if defined(VERBOSE) - std::cerr << "you provided too many samples"; -# endif - break; - } + if(htxt.s[i] == '\t') ncols++; + i--; + } + if(i < 0 || strncmp(htxt.s + i + 1, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", 45)) + { + if(i > 0 && !strncmp(htxt.s + i + 1, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) + throw std::runtime_error("Error: missing FORMAT fields, cowardly refusing to add samples\n"); + throw std::runtime_error("Could not parse the header: " + std::string(htxt.s)); } - int ret = bcf_hdr_sync(hdr); - if(ret != 0) throw std::runtime_error("could not modify " + samples + " in the header\n"); + + ncols = 0; + while(ncols != 9) + { + i++; + if(htxt.s[i] == '\t') ncols++; + } + htxt.l = i; + + // Replace all samples + for(i = 0; i < nsamples; i++) + { + kputc('\t', &htxt); + kputs(ss[i].c_str(), &htxt); + } + kputc('\n', &htxt); + + // destroy the old and make new header + bcf_hdr_destroy(hdr); + hdr = bcf_hdr_init("r"); + if(bcf_hdr_parse(hdr, htxt.s) < 0) + throw std::runtime_error("An error occurred while parsing the header\n"); + // free everything + free(htxt.s); } /**