Skip to content

Commit

Permalink
test refill haps 0.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Jan 31, 2024
1 parent 0fcf704 commit c76a1f7
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 26 deletions.
32 changes: 28 additions & 4 deletions R/plot_haplotypes.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @export
plot_gamma <- function(gammaC, sites = NULL, title="") {
plotGamma <- function(gammaC, sites = NULL, ...) {
N <- length(gammaC)
C <- nrow(gammaC[[1]])
M <- ncol(gammaC[[1]])
Expand All @@ -8,9 +8,7 @@ plot_gamma <- function(gammaC, sites = NULL, title="") {
} else {
sites <- 1:M
}
plot(0, 0, col = "white", axes=FALSE, xlim = c(0, M), ylim = c(1, N + 1),
xlab = "", ylab = "",
cex.lab = 1.5, cex.main = 2.0, main = title)
plot(0, 0, col = "white", axes=FALSE, xlim = c(0, M), ylim = c(1, N + 1),...)
d <- 1
xleft <- 1:M - d
xright <- 1:M - d
Expand All @@ -25,3 +23,29 @@ plot_gamma <- function(gammaC, sites = NULL, title="") {
}
}

#' @export
plotHapFreqWithPhysicalPos <- function(K,
pos,
hapfreq,
...) {
##
colStore <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
nCols <- length(colStore)
nGrids <- length(pos)
sum <- array(0, nGrids)
xlim <- range(pos)
ylim <- c(0, 1)
## OK so if there are grids, use the grid points
plot(x = 0, y = 0, xlim = xlim, ylim = ylim, axes = FALSE, ...)
x <- c(pos[1], pos, pos[length(pos):1])
m <- array(0, c(nGrids, K + 1))
for(i in 1:K) {
m[, i + 1] <- m[, i] + hapfreq[i, ]
}
for(i in K:1) {
polygon(
x = x, y = c(m[1, i], m[, i + 1], m[nGrids:1, i]),
xlim = xlim, ylim = ylim, col = colStore[(i %% nCols) + 1]
)
}
}
63 changes: 42 additions & 21 deletions src/fastphase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@ void FastPhaseK2::setFlags(double tol_p, double tol_f, double tol_q, bool debug_
NR = nR;
}

void FastPhaseK2::refillHaps()
{
// bin hapsum per 100 snps ?
for(int c = 0; c < C; c++)
{
for(int m = 0; m < M; m++)
{
if(HapSum(c, m) >= minHapfreq) continue;
MyArr1D h = HapSum.col(m);
h(c) = 0; // do not re-sample current
h /= h.sum();
MyFloat1D p(h.data(), h.data() + h.size());
std::discrete_distribution<int> distribution{p.begin(), p.end()};
int choice = distribution(rng);
assert(choice != c);
F(m, c) = F(m, choice);
}
}
}

void FastPhaseK2::initIteration()
{
// initial temp variables
Expand All @@ -51,6 +71,21 @@ void FastPhaseK2::initIteration()
HapSum.setZero(C, M); // reset post(Z,j)
}

void FastPhaseK2::updateIteration()
{
// update R
if(!NR) er = 1.0 - Ezj.colwise().sum() / N;
// update F
if(!NP) F = (Ezg2 / (Ezg1 + Ezg2)).transpose();
// update PI
if(!NF)
{
PI = Ezj;
PI.rowwise() /= PI.colwise().sum();
}
protectPars();
}

void FastPhaseK2::protectPars()
{
// protect F
Expand Down Expand Up @@ -92,21 +127,6 @@ void FastPhaseK2::protectPars()
HapSum.rowwise() /= HapSum.colwise().sum();
}

void FastPhaseK2::updateIteration()
{
// update R
if(!NR) er = 1.0 - Ezj.colwise().sum() / N;
// update F
if(!NP) F = (Ezg2 / (Ezg1 + Ezg2)).transpose();
// update PI
if(!NF)
{
PI = Ezj;
PI.rowwise() /= PI.colwise().sum();
}
protectPars();
}

/*
** @param GL genotype likelihood of all individuals in snp major form
** @param ind current individual i
Expand Down Expand Up @@ -221,11 +241,12 @@ int run_impute_main(Options & opts)
prevlike = loglike;
cao.print(tim.date(), "run whole genome, iteration", it, ", likelihoods =", loglike, ", diff =", diff, ", time",
tim.reltime(), " sec");
if(diff < opts.ltol)
{
cao.print(tim.date(), "hit stopping criteria, diff =", std::scientific, diff, " <", opts.ltol);
break;
}
// if(diff < opts.ltol)
// {
// cao.print(tim.date(), "hit stopping criteria, diff =", std::scientific, diff, " <", opts.ltol);
// break;
// }
if(it > 4 && it < 30 && it % 4 == 1) faith.refillHaps();
}
// reuse Ezj for AE
if(opts.eHap)
Expand Down Expand Up @@ -262,7 +283,7 @@ int run_impute_main(Options & opts)
orecomb << faith.R.transpose().format(fmt) << "\n";
std::ofstream opi(opts.out + ".pi");
opi << faith.PI.transpose().format(fmt) << "\n";
std::ofstream ohap(opts.out + ".hapsum");
std::ofstream ohap(opts.out + ".hapfreq");
ohap << faith.HapSum.transpose().format(fmt) << "\n";
std::ofstream oae(opts.out + ".ae");
oae << faith.Ezj.transpose().format(fmt) << "\n";
Expand Down
2 changes: 2 additions & 0 deletions src/fastphase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class FastPhaseK2
double alleleEmitThreshold{1e-6}; // threshold for P
double clusterFreqThreshold{1e-6}; // threshold for F
double admixtureThreshold{1e-6}; // threshold for Q
double minHapfreq{0.01}; // min haplotype frequency, or min(1/(10*C), 1/100)

public:
FastPhaseK2(int n, int m, int c, int seed) : N(n), M(m), C(c), CC(c * c)
Expand Down Expand Up @@ -55,6 +56,7 @@ class FastPhaseK2

void initRecombination(const Int2D & pos, std::string rfile = "", int B = 1, double Ne = 20000);
void setFlags(double, double, double, bool, bool, bool, bool, bool);
void refillHaps(); // re-sample F for sites with hapfreq < minHapfreq
void protectPars();
void initIteration();
void updateIteration();
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ int main(int argc, char * argv[])
{
// ========= helper message and parameters parsing ===========================

const std::string VERSION{"0.4.0"};
const std::string VERSION{"0.4.1"};

// below for catching ctrl+c, and dumping files
struct sigaction sa;
Expand Down

0 comments on commit c76a1f7

Please sign in to comment.