diff --git a/src/common.hpp b/src/common.hpp index c9749be..05b722f 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -96,8 +96,8 @@ inline MatrixType RandomUniform(const Eigen::Index numRows, struct Options { - int ichunk{0}, chunksize{10000}, K{2}, C{10}, nadmix{1000}, nimpute{40}, nthreads{1}, seed{999}; - int gridsize{1}, refillHaps{0}; + int ichunk{0}, chunksize{50000}, K{2}, C{10}, nadmix{1000}, nimpute{40}, nthreads{1}, seed{999}; + int gridsize{1}, refillHaps{0}, buffer{0}; double ltol{1e-1}, info{0}, tol_pi{0.99}, tol_r{1e-5}; double ptol{1e-6}; // threshold for P double ftol{1e-6}; // threshold for F @@ -140,7 +140,8 @@ struct Pars P = MyFloat1D(iP.data(), iP.data() + iP.size()); Q = MyFloat1D(iQ.data(), iQ.data() + iQ.size()); er = MyFloat1D(ier.data(), ier.data() + ier.size()); - for(size_t k = 0; k < iF.size(); k++) F.emplace_back(MyFloat1D(iF[k].data(), iF[k].data() + iF[k].size())); + for(size_t k = 0; k < iF.size(); k++) + F.emplace_back(MyFloat1D(iF[k].data(), iF[k].data() + iF[k].size())); } int K, C, M, N; MyFloat1D P, Q; @@ -367,8 +368,8 @@ inline MyArr1D get_emission_by_site(const MyArr1D & gli, const MyArr1D & P, doub { for(g2 = 0; g2 <= 1; g2++) { - emit(z12) += - gli(g1 + g2) * (g1 * P(z1) + (1 - g1) * (1 - P(z1))) * (g2 * P(z2) + (1 - g2) * (1 - P(z2))); + emit(z12) += gli(g1 + g2) * (g1 * P(z1) + (1 - g1) * (1 - P(z1))) + * (g2 * P(z2) + (1 - g2) * (1 - P(z2))); } } } @@ -453,8 +454,8 @@ inline auto forward_backwards_diploid(const MyArr2D & emit, const MyArr2D & R, c { z12 = z1 * C + z2; alpha(z12, s) = emit(z12, s) - * (alpha(z12, s - 1) * R(0, s) + PI(z1, s) * sumTmp1(z2) + PI(z2, s) * sumTmp1(z1) - + PI(z1, s) * PI(z2, s) * constTmp); + * (alpha(z12, s - 1) * R(0, s) + PI(z1, s) * sumTmp1(z2) + + PI(z2, s) * sumTmp1(z1) + PI(z1, s) * PI(z2, s) * constTmp); } } cs(s) = 1.0 / alpha.col(s).sum(); @@ -483,7 +484,8 @@ inline auto forward_backwards_diploid(const MyArr2D & emit, const MyArr2D & R, c { z12 = z1 * C + z2; // apply scaling - beta(z12, s) = (beta_mult_emit(z12) * R(0, s + 1) + sumTmp1(z1) + sumTmp1(z2) + constTmp) * cs(s + 1); + beta(z12, s) = + (beta_mult_emit(z12) * R(0, s + 1) + sumTmp1(z1) + sumTmp1(z2) + constTmp) * cs(s + 1); } } } diff --git a/src/io.cpp b/src/io.cpp index d6491c8..3ce6bf2 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -488,28 +488,6 @@ void chunk_beagle_genotype_likelihoods(const std::unique_ptr & genome, c // now evenly split last two chunks of each chromosome } -void update_bigass_inplace(const std::unique_ptr & genome) -{ - int ic, ndiff; - for(ic = 0, genome->nsnps = 0; ic < genome->nchunks; ic++) genome->nsnps += genome->pos[ic].size(); - for(ic = 0; ic < genome->nchunks; ic++) - { - if(ic == 0) continue; // assume the first chunksize is not greater than the defined - if((int)genome->pos[ic - 1].size() < genome->chunksize) - { - ndiff = genome->chunksize - genome->pos[ic - 1].size(); - if((int)genome->pos[ic].size() >= ndiff) - { - genome->pos[ic - 1].insert(genome->pos[ic - 1].end(), genome->pos[ic].begin(), - genome->pos[ic].begin() + ndiff); - Int1D(genome->pos[ic].begin() + ndiff, genome->pos[ic].end()).swap(genome->pos[ic]); - } - else - break; - } - } -} - size_t count_lines(std::string fpath) { std::ifstream ifs(fpath); @@ -627,3 +605,31 @@ void init_bigass(const std::unique_ptr & genome, const Options & opts) cao.error("number of grids should be same as snps if B=1"); cao.done(tim.date(), "elapsed time for parsing beagle file", std::fixed, tim.reltime(), " secs"); } + +/* + * if buffer is desired, re-split chunks + */ +void update_bigass(const std::unique_ptr & genome, const Options & opts) +{ + assert(opts.buffer > 0); + size_t s = opts.buffer * genome->nsamples * 3; + for(int ic = 0; ic < genome->nchunks; ic++) + { + assert(opts.buffer < genome->pos[ic].size()); + if(ic < genome->nchunks - 1) + { + + genome->pos[ic].insert(genome->pos[ic].end(), genome->pos[ic + 1].begin(), + genome->pos[ic + 1].begin() + opts.buffer); + genome->gls[ic].insert(genome->gls[ic].end(), genome->gls[ic + 1].begin(), + genome->gls[ic + 1].end() + s); + } + else + { + genome->pos[ic].insert(genome->pos[ic].begin(), genome->pos[ic - 1].end() - opts.buffer, + genome->pos[ic - 1].end()); + genome->gls[ic].insert(genome->gls[ic].begin(), genome->gls[ic - 1].end() - s, + genome->gls[ic - 1].end()); + } + } +} diff --git a/src/io.hpp b/src/io.hpp index 45ca40e..a07ebe9 100644 --- a/src/io.hpp +++ b/src/io.hpp @@ -39,5 +39,6 @@ std::string convert_geno2like(std::vector bed, std::vector marker, const uint64_t nsamples); void init_bigass(const std::unique_ptr & genome, const Options & opts); +void update_bigass(const std::unique_ptr & genome, const Options & opts); #endif // PHASELESS_IO_H_