Skip to content

Commit

Permalink
dosage support gt and fix getFORMAT
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Nov 24, 2022
1 parent 4f0c368 commit a1c6276
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
22 changes: 16 additions & 6 deletions tools/dosage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ int main(int argc, char* argv[])
{
std::cout << "Author: Zilong-Li ([email protected])\n"
<< "Description:\n"
<< " create DS tag for diploid samples given GP tag from input vcf file\n\n"
<< " create DS tag for diploid samples given GP or GT tag from input vcf file\n\n"
<< "Usage example:\n"
<< " " + (std::string)argv[0] + " -i in.bcf \n"
<< " " + (std::string)argv[0] + " -i in.bcf -o out.bcf -s ^S1,S2 -r chr1:1-1000 \n"
Expand All @@ -25,7 +25,7 @@ int main(int argc, char* argv[])
<< std::endl;
return 1;
}
std::string invcf, outvcf="-", samples = "-", region = "";
std::string invcf, outvcf = "-", samples = "-", region = "";
for (int i = 0; i < args.size(); i++)
{
if (args[i] == "-i")
Expand All @@ -43,13 +43,23 @@ int main(int argc, char* argv[])
BcfWriter bw(outvcf, vcf.header);
bw.header.addFORMAT("DS", "1", "Float", "Diploid Genotype Dosage"); // add DS tag into the header
int nsamples = vcf.nsamples;
std::vector<float> gp, ds(nsamples);
vector<float> gps, ds(nsamples);
vector<char> gts;
while (vcf.getNextVariant(var))
{
var.getFORMAT("GP", gp); // get GP values
for (int i = 0; i < nsamples; i++)
try
{
ds[i] = gp[i * 3 + 1] + gp[i * 3 + 2] * 2;
var.getFORMAT("GP", gps); // try get GP values
for (int i = 0; i < nsamples; i++)
{
ds[i] = gps[i * 3 + 1] + gps[i * 3 + 2] * 2;
}
}
catch (const std::runtime_error& e)
{
var.getGenotypes(gts);
for (int i = 0; i < nsamples; i++)
ds[i] = gts[i * 2] + gts[i * 2 + 1];
}
var.setFORMAT("DS", ds);
bw.writeRecord(var);
Expand Down
12 changes: 11 additions & 1 deletion vcfpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ namespace vcfpp
isFormatVector<T> getFORMAT(std::string tag, T& v)
{
fmt = bcf_get_fmt(header.hdr, line, tag.c_str());
if (!fmt)
throw std::runtime_error("there is no " + tag + " in FORMAT of this variant.\n");
nvalues = fmt->n;
ndst = 0;
S* dst = NULL;
Expand All @@ -421,7 +423,7 @@ namespace vcfpp
}
else
{
throw std::runtime_error("couldn't parse the " + tag + " format of this variant.\n");
throw std::runtime_error("failed to parse the " + tag + " format of this variant.\n");
}
}

Expand All @@ -434,6 +436,8 @@ namespace vcfpp
bool getFORMAT(std::string tag, std::vector<std::string>& v)
{
fmt = bcf_get_fmt(header.hdr, line, tag.c_str());
if (!fmt)
throw std::runtime_error("there is no " + tag + " in FORMAT of this variant.\n");
nvalues = fmt->n;
// if ndst < (fmt->n+1)*nsmpl; then realloc is involved
ret = -1, ndst = 0;
Expand Down Expand Up @@ -467,6 +471,8 @@ namespace vcfpp
isInfoVector<T> getINFO(std::string tag, T& v)
{
info = bcf_get_info(header.hdr, line, tag.c_str());
if (!info)
throw std::runtime_error("there is no " + tag + " tag in INFO of this variant.\n");
S* dst = NULL;
ndst = 0;
ret = -1;
Expand Down Expand Up @@ -495,6 +501,8 @@ namespace vcfpp
isScalar<T> getINFO(std::string tag, T& v)
{
info = bcf_get_info(header.hdr, line, tag.c_str());
if (!info)
throw std::runtime_error("there is no " + tag + " tag in INFO of this variant.\n");
// scalar value
if (info->len == 1)
{
Expand Down Expand Up @@ -524,6 +532,8 @@ namespace vcfpp
isString<T> getINFO(std::string tag, T& v)
{
info = bcf_get_info(header.hdr, line, tag.c_str());
if (!info)
throw std::runtime_error("there is no " + tag + " tag in INFO of this variant.\n");
if (info->type == BCF_BT_CHAR)
v = std::string(reinterpret_cast<char*>(info->vptr), info->vptr_len);
else
Expand Down

0 comments on commit a1c6276

Please sign in to comment.