Skip to content

Commit

Permalink
add stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Aug 15, 2024
1 parent 39390bd commit 4e8ede3
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 30 deletions.
18 changes: 10 additions & 8 deletions src/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ inline MatrixType RandomUniform(const Eigen::Index numRows,

struct Options
{
int ichunk{0}, chunksize{10000}, K{2}, C{10}, nadmix{1000}, nimpute{40}, nthreads{1}, seed{999};
int gridsize{1}, refillHaps{0};
int ichunk{0}, chunksize{50000}, K{2}, C{10}, nadmix{1000}, nimpute{40}, nthreads{1}, seed{999};
int gridsize{1}, refillHaps{0}, buffer{0};
double ltol{1e-1}, info{0}, tol_pi{0.99}, tol_r{1e-5};
double ptol{1e-6}; // threshold for P
double ftol{1e-6}; // threshold for F
Expand Down Expand Up @@ -140,7 +140,8 @@ struct Pars
P = MyFloat1D(iP.data(), iP.data() + iP.size());
Q = MyFloat1D(iQ.data(), iQ.data() + iQ.size());
er = MyFloat1D(ier.data(), ier.data() + ier.size());
for(size_t k = 0; k < iF.size(); k++) F.emplace_back(MyFloat1D(iF[k].data(), iF[k].data() + iF[k].size()));
for(size_t k = 0; k < iF.size(); k++)
F.emplace_back(MyFloat1D(iF[k].data(), iF[k].data() + iF[k].size()));
}
int K, C, M, N;
MyFloat1D P, Q;
Expand Down Expand Up @@ -367,8 +368,8 @@ inline MyArr1D get_emission_by_site(const MyArr1D & gli, const MyArr1D & P, doub
{
for(g2 = 0; g2 <= 1; g2++)
{
emit(z12) +=
gli(g1 + g2) * (g1 * P(z1) + (1 - g1) * (1 - P(z1))) * (g2 * P(z2) + (1 - g2) * (1 - P(z2)));
emit(z12) += gli(g1 + g2) * (g1 * P(z1) + (1 - g1) * (1 - P(z1)))
* (g2 * P(z2) + (1 - g2) * (1 - P(z2)));
}
}
}
Expand Down Expand Up @@ -453,8 +454,8 @@ inline auto forward_backwards_diploid(const MyArr2D & emit, const MyArr2D & R, c
{
z12 = z1 * C + z2;
alpha(z12, s) = emit(z12, s)
* (alpha(z12, s - 1) * R(0, s) + PI(z1, s) * sumTmp1(z2) + PI(z2, s) * sumTmp1(z1)
+ PI(z1, s) * PI(z2, s) * constTmp);
* (alpha(z12, s - 1) * R(0, s) + PI(z1, s) * sumTmp1(z2)
+ PI(z2, s) * sumTmp1(z1) + PI(z1, s) * PI(z2, s) * constTmp);
}
}
cs(s) = 1.0 / alpha.col(s).sum();
Expand Down Expand Up @@ -483,7 +484,8 @@ inline auto forward_backwards_diploid(const MyArr2D & emit, const MyArr2D & R, c
{
z12 = z1 * C + z2;
// apply scaling
beta(z12, s) = (beta_mult_emit(z12) * R(0, s + 1) + sumTmp1(z1) + sumTmp1(z2) + constTmp) * cs(s + 1);
beta(z12, s) =
(beta_mult_emit(z12) * R(0, s + 1) + sumTmp1(z1) + sumTmp1(z2) + constTmp) * cs(s + 1);
}
}
}
Expand Down
50 changes: 28 additions & 22 deletions src/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,28 +488,6 @@ void chunk_beagle_genotype_likelihoods(const std::unique_ptr<BigAss> & genome, c
// now evenly split last two chunks of each chromosome
}

void update_bigass_inplace(const std::unique_ptr<BigAss> & genome)
{
int ic, ndiff;
for(ic = 0, genome->nsnps = 0; ic < genome->nchunks; ic++) genome->nsnps += genome->pos[ic].size();
for(ic = 0; ic < genome->nchunks; ic++)
{
if(ic == 0) continue; // assume the first chunksize is not greater than the defined
if((int)genome->pos[ic - 1].size() < genome->chunksize)
{
ndiff = genome->chunksize - genome->pos[ic - 1].size();
if((int)genome->pos[ic].size() >= ndiff)
{
genome->pos[ic - 1].insert(genome->pos[ic - 1].end(), genome->pos[ic].begin(),
genome->pos[ic].begin() + ndiff);
Int1D(genome->pos[ic].begin() + ndiff, genome->pos[ic].end()).swap(genome->pos[ic]);
}
else
break;
}
}
}

size_t count_lines(std::string fpath)
{
std::ifstream ifs(fpath);
Expand Down Expand Up @@ -627,3 +605,31 @@ void init_bigass(const std::unique_ptr<BigAss> & genome, const Options & opts)
cao.error("number of grids should be same as snps if B=1");
cao.done(tim.date(), "elapsed time for parsing beagle file", std::fixed, tim.reltime(), " secs");
}

/*
* if buffer is desired, re-split chunks
*/
void update_bigass(const std::unique_ptr<BigAss> & genome, const Options & opts)
{
assert(opts.buffer > 0);
size_t s = opts.buffer * genome->nsamples * 3;
for(int ic = 0; ic < genome->nchunks; ic++)
{
assert(opts.buffer < genome->pos[ic].size());
if(ic < genome->nchunks - 1)
{

genome->pos[ic].insert(genome->pos[ic].end(), genome->pos[ic + 1].begin(),
genome->pos[ic + 1].begin() + opts.buffer);
genome->gls[ic].insert(genome->gls[ic].end(), genome->gls[ic + 1].begin(),
genome->gls[ic + 1].end() + s);
}
else
{
genome->pos[ic].insert(genome->pos[ic].begin(), genome->pos[ic - 1].end() - opts.buffer,
genome->pos[ic - 1].end());
genome->gls[ic].insert(genome->gls[ic].begin(), genome->gls[ic - 1].end() - s,
genome->gls[ic - 1].end());
}
}
}
1 change: 1 addition & 0 deletions src/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ std::string convert_geno2like(std::vector<uint8_t> bed,
std::vector<std::string> marker,
const uint64_t nsamples);
void init_bigass(const std::unique_ptr<BigAss> & genome, const Options & opts);
void update_bigass(const std::unique_ptr<BigAss> & genome, const Options & opts);

#endif // PHASELESS_IO_H_

0 comments on commit 4e8ede3

Please sign in to comment.