From 6dd93419adf0b3c2902417f6b8b6f294f5266160 Mon Sep 17 00:00:00 2001 From: Zilong-Li Date: Wed, 7 Feb 2024 15:05:10 +0100 Subject: [PATCH] update r package --- NAMESPACE | 6 +++- R/plot_haplotypes.R | 63 ++++++++++++++++++++++------------------- src/parse-phaseless.cpp | 22 ++++++++------ 3 files changed, 53 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4dfd139..3a05a58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,13 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,admixQ) +S3method(plot,gamma) +S3method(plot,hapfreq) +export(admix.alignKStephens) +export(admix.plotQ) export(parse_impute_opt) export(parse_impute_par) export(parse_joint_par) export(parse_joint_post) -export(plot_gamma) importFrom(Rcpp,sourceCpp) useDynLib(phaseless, .registration = TRUE) diff --git a/R/plot_haplotypes.R b/R/plot_haplotypes.R index a7b2454..f762845 100644 --- a/R/plot_haplotypes.R +++ b/R/plot_haplotypes.R @@ -1,8 +1,9 @@ #' @export -plotGamma <- function(gammaC, sites = NULL, ...) { - N <- length(gammaC) - C <- nrow(gammaC[[1]]) - M <- ncol(gammaC[[1]]) +plot.gamma <- function(gamma, sites = NULL, ...) { + stopifnot(is.list(gamma)) + N <- length(gamma) + C <- nrow(gamma[[1]]) + M <- ncol(gamma[[1]]) if(!is.null(sites) & is.vector(sites) & length(sites) < M) { M <- length(sites) } else { @@ -16,36 +17,40 @@ plotGamma <- function(gammaC, sites = NULL, ...) { ytop <- i + array(0, M) ybottom <- i + array(0, M) for(c in 1:C) { - ytop <- ytop + gammaC[[i]][c, sites] + ytop <- ytop + gamma[[i]][c, sites] rect(xleft = xleft - d, xright = xright + d, ybottom = ybottom, ytop = ytop, col = c, lwd = 0, border = NA) ybottom <- ytop } } } + #' @export -plotHapFreqWithPhysicalPos <- function(K, - pos, - hapfreq, - ...) { - ## - colStore <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") - nCols <- length(colStore) - nGrids <- length(pos) - sum <- array(0, nGrids) - xlim <- range(pos) - ylim <- c(0, 1) - ## OK so if there are grids, use the grid points - plot(x = 0, y = 0, xlim = xlim, ylim = ylim, axes = FALSE, ...) - x <- c(pos[1], pos, pos[length(pos):1]) - m <- array(0, c(nGrids, K + 1)) - for(i in 1:K) { - m[, i + 1] <- m[, i] + hapfreq[i, ] - } - for(i in K:1) { - polygon( - x = x, y = c(m[1, i], m[, i + 1], m[nGrids:1, i]), - xlim = xlim, ylim = ylim, col = colStore[(i %% nCols) + 1] - ) - } +plot.hapfreq <- function(hapfreq, + pos, + recomb = NULL, + colors = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"), + ...) { + stopifnot(is.matrix(hapfreq), is.vector(pos)) + nCols <- length(colors) + nGrids <- length(pos) + K <- nrow(hapfreq) + sum <- array(0, nGrids) + xlim <- range(pos) + ylim <- c(0, 1) + ## OK so if there are grids, use the grid points + plot(x = 0, y = 0, xlim = xlim, ylim = ylim, axes = FALSE, ...) + x <- c(pos[1], pos, pos[length(pos):1]) + m <- array(0, c(nGrids, K + 1)) + for(i in 1:K) { + m[, i + 1] <- m[, i] + hapfreq[i, ] + } + for(i in K:1) { + polygon( + x = x, y = c(m[1, i], m[, i + 1], m[nGrids:1, i]), + xlim = xlim, ylim = ylim, col = colors[(i %% nCols) + 1] + ) + } + if(!is.null(recomb)) + lines(pos[-1], recomb, type = "l", col = "red") } diff --git a/src/parse-phaseless.cpp b/src/parse-phaseless.cpp index 703f673..5f9f165 100644 --- a/src/parse-phaseless.cpp +++ b/src/parse-phaseless.cpp @@ -59,7 +59,7 @@ List parse_joint_post(std::string filename, int chunk = 0) for(int ind = 0; ind < par->N; ind++) { Eigen::Map gli(par->gls[ic].data() + ind * S * 3, S, 3); - MyArr2D emit = get_emission_by_gl(gli, P.middleRows(pos_chunk - S, S)).transpose(); // CC x S + MyArr2D emit = get_emission_by_gl(gli, P.middleRows(pos_chunk - S, S)); // first get H ie old PI in fastphase MyArr2D H = MyArr2D::Zero(C, S); int z1, y1, s; // m * C + z1 @@ -166,22 +166,28 @@ List parse_impute_par(std::string filename, int ic = -1) for(int ind = 0; ind < genome->nsamples; ind++) ids.push_back(ind); int nchunks = ic < 0 ? genome->nchunks : 1; int N = ids.size(); - List ret(N); const int C = genome->C; + Bool1D collapse = Bool1D::Constant(genome->nsnps, false); + int j{0}; + for(auto c : genome->collapse) collapse(j++) = (c == 1); + List ret(N); for(auto ind : ids) { List gamma(nchunks); - for(int c = 0; c < nchunks; c++) { + for(int c = 0, ss = 0; c < nchunks; c++) { ic = nchunks > 1 ? c : std::max(ic, c); const int S = genome->pos[ic].size(); - const int nGrids = genome->B > 1 ? (S + genome->B - 1) / genome->B : S; + const auto se = find_grid_start_end(collapse.segment(ss, S)); + const int G = se.size(); Eigen::Map gli(genome->gls[ic].data() + ind * S * 3, S, 3); - Eigen::Map P(genome->F[ic].data(), S, C); - Eigen::Map PI(genome->PI[ic].data(), C, S); - Eigen::Map R(genome->R[ic].data(), 3, S); - MyArr2D emit = get_emission_by_gl(gli, P).transpose(); // CC x S + Eigen::Map P(genome->P[ic].data(), S, C); + Eigen::Map PI(genome->PI[ic].data(), C, G); + Eigen::Map R(genome->R[ic].data(), 3, G); + // Eigen::Map AE(genome->AE[ic].data(), C, G); + MyArr2D emit = get_emission_by_grid(gli, P, collapse.segment(ss, S)); const auto [alpha, beta, cs] = forward_backwards_diploid(emit, R, PI); gamma[c] = alpha * beta; + ss += S; } ret[ind] = gamma; }