diff --git a/DESCRIPTION b/DESCRIPTION index ee2b32c..c66c6be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: TAPACLOTH +Package: INCOMMON Title: Targeted panel-oriented test for clonality and heterozygosity Version: 0.0.1 Author: Nicola Calonaci @@ -8,15 +8,16 @@ Authors@R: comment = c(ORCID = "0000-0003-2721-4888")), person("Giulio", "Caravagna", , "gcaravag@gmail.com", role = c("aut", "cre"), comment = c(ORCID = ""))) -Description: TAPACLOTH is a tool that classifies somatic mutation calls from - targeted-panel sequencing as either subclonal, clonal heterozygous or clonal - LOH thorugh a statistical hypothesis test based on a Binomial or Beta-Binomial - modelling of the sequencing read count process. As an additional feature, - TAPACLOTH can be used to infer sample purity. +Description: INCOMMON is a tool for the INference of COpy number and Mutation Multiplicity in ONcology. + INCOMMON infers the copy number and multiplicity of somatic mutations from tumour-only read count data, + and can be applied to classify mutations from large-size datasets in an efficient and fast way. + Mutations are classified as either Tier-1 (present in 100% cells) without copy-number alterations + (heterozygous mutant diploid HMD), with loss of heterozygosity (LOH), copy-neutral LOH (CNLOH), amplification (AM), + or Tier-2 (subclonal or with high ploidy and low multiplicity). License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.1 Suggests: knitr, rmarkdown, @@ -36,8 +37,8 @@ Imports: Remotes: caravagnalab/BMix, caravagnalab/CNAqc -URL: https://github.com/caravagnalab/TAPACLOTH -BugReports: https://github.com/caravagnalab/TAPACLOTH/issues +URL: https://github.com/caravagnalab/INCOMMON +BugReports: https://github.com/caravagnalab/INCOMMON/issues VignetteBuilder: knitr Depends: R (>= 2.10) diff --git a/LICENSE b/LICENSE index eca47de..30b1978 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ YEAR: 2022 -COPYRIGHT HOLDER: TAPACLOTH authors +COPYRIGHT HOLDER: INCOMMON authors diff --git a/NAMESPACE b/NAMESPACE index 4a1376a..596a1fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,41 +1,23 @@ # Generated by roxygen2: do not edit by hand -S3method(print,TAPACLOTH) -export(binomial_test) -export(estimate_purity) -export(gene_label) -export(gene_entropy) -export(get_DP) -export(get_NV) -export(get_VAF) -export(get_classes) -export(get_classifier) -export(get_coord) -export(get_gene) -export(get_id) -export(get_params) -export(get_purity_bmix) -export(get_purity_estimate) -export(get_reliability) +S3method(plot,INCOMMON) +S3method(print,INCOMMON) +export(classification) +export(classify) +export(compute_posterior) export(init) -export(models_avail) -export(plot.TAPACLOTH) -export(plot_bmix) +export(parameters) +export(plot_classification) export(plot_gene) -export(plot_heatmap) export(plot_summary) -export(plot_test) -export(run_classifier) -export(test_setup) +export(posterior) import(CNAqc) -import(VGAM) import(cli) import(crayon) import(ggplot2) import(ggridges) import(ggsci) import(pheatmap) -import(stringr) importFrom(dplyr,"%>%") importFrom(dplyr,filter) importFrom(dplyr,mutate) diff --git a/R/binomial_test.R b/R/binomial_test.R deleted file mode 100644 index c5c5227..0000000 --- a/R/binomial_test.R +++ /dev/null @@ -1,183 +0,0 @@ -#' Compute likelihoods and uncertainty.. -#' -#' @param test Number of reads with the variant. -#' @param DP Sequencing coverage of the mutated genome site. -#' @param purity Purity of the sample. -#' @param cutoff Likelihood cut-off for class assignment. -#' @param model Model used for the classification task, either "Binomial" (no over-dispersion) -#' or "Beta-Binomial" (over-dispersion included), that will be used as the expected -#' distribution for the number of reads with variant at fixed coverage and purity. -#' @param rho If "Beta-Binomial" model is selected, this parameter tunes the over-dispersion -#' of the expected distribution used for the classification. -#' @param karyotypes Karyotypes to be included among the possible classes. -#' @return A tibble including ploidy, multiplicity, proportion of wt alleles, -#' uncertainty of the class assignment, model density. -#' @export -#' @importFrom dplyr filter mutate rename select %>% -#' @examples -#' binomial_test(test = 220, -#' DP = 500, -#' purity = 0.98, -#' cutoff = 0.9, -#' model = "Beta-Binomial", -#' rho = 0.01, -#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") -#') - -binomial_test = function(test, - DP, - gene, - priors = "NULL", - purity, - cutoff, - rho = 0.01, - karyotypes, - assign_extremes - ) -{ - NV_x = 1:DP - - # Density - db = function(Major, minor, priors, gene) - { - peaks = CNAqc:::expected_vaf_peak(Major, minor, purity)$peak - - lapply(peaks %>% seq_along, function(p) { - label = paste0(Major + minor, 'N (Mutated: ', p, "N)") - - out = data.frame( - density = compute_density(NV_x, DP, prob = peaks[p], rho), - NV = NV_x, - Major = Major, - minor = minor, - ploidy = Major + minor, - multiplicity = p, - karyotype = paste0(Major, ":", minor), - label = label, - peak = peaks[p] - ) - - if (!is.null(priors)) - out$density = out$density * get_prior(priors, gene, label) - - out - - }) %>% - Reduce(f = bind_rows) - } - - cut = function(x, cutoff) - { - - ## Normalize likelihoods by maximum and compute corresponding cut-offs - x = x %>% - group_by(label) %>% - reframe( - density, - NV, - ploidy, - multiplicity, - label, - peak, - cutoff = max(density) * cutoff - ) %>% - unique() %>% - ungroup() - - ## Compute assignment probability (relative likelihood) for each class - - x = x %>% - group_by(NV) %>% - reframe( - density, - ploidy, - multiplicity, - peak, - cutoff, - label, - p_assign = density / sum(density), - entropy = 1 - (density / sum(density)) - # entropy = 1 - max(density) / sum(density) - ) %>% - ungroup() - - ## Add out-of-sample label for densities lower than cut-offs - x = x %>% - mutate(label = ifelse(density < cutoff, "out of sample", label)) - x - } - - if(is.na(purity)){ - cli_alert_warning(text = - "With purity {.field {purity}} classification is not possible." - ) - return(tibble(ploidy = NA, - multiplicity = NA, - entropy = NA, - label = NA, - density = list(NULL))) - } - - dataset = lapply(karyotypes, function(k) { - alleles = strsplit(k, split = ":")[[1]] %>% as.integer() - db(alleles[1],alleles[2], priors, gene) - }) %>% bind_rows() %>% cut(cutoff) - - # Compute mean entropy over in-sample data - - mean_entropy = dataset %>% - filter(label != "out of sample") %>% - group_by(NV) %>% - summarise(u = min(entropy)) %>% - pull(u) %>% - mean() - - - tested = dataset %>% - maximise() %>% - dplyr::filter(NV == test) - - if(nrow(tested)>1){ - cli_alert_warning(text = - "With purity {.field {purity}} classes {.field {tested$label}} have the same likelihood" - ) - cli_alert_warning(text = - "Simplest case will be selected: {.field {tested$label[1]}}" - ) - - tested = tested[1.,] - } - - # Optionally assign NVs lower than minimum (larger than maximum) accepted to - # Subclonal/Higher Ploidy (Higher Ploidy and Multiplicity) - if(tested$label == "out of sample" & assign_extremes){ - min_in_sample = dataset %>% - dplyr::filter(label != "out of sample") %>% - dplyr::filter(NV == min(NV)) %>% - pull(NV) %>% unique() - - max_in_sample = dataset %>% - dplyr::filter(label != "out of sample") %>% - dplyr::filter(NV == max(NV)) %>% - pull(NV) %>% unique() - - # tested$label = ifelse(tested$NV < min_in_sample | tested$NV > max_in_sample, - # paste0(tested$ploidy,'N (Mutated: ', tested$multiplicity,"N)"), - # tested$label) - # tested$label = ifelse(tested$NV < min_in_sample | tested$NV > max_in_sample, - # paste0(tested$ploidy,'N (Mutated: ', tested$multiplicity,"N)"), - # tested$label) - if(tested$NV < min_in_sample) tested$label = "SUB/HPLM" - if(tested$NV > max_in_sample) tested$label = "HPHM" - } - - - return(tibble(ploidy = tested$ploidy, - multiplicity = tested$multiplicity, - p_assign = tested$p_assign, - entropy = tested$entropy, - label = tested$label, - density = list(dataset), - mean_entropy = mean_entropy)) -} - \ No newline at end of file diff --git a/R/classes.R b/R/classes.R deleted file mode 100644 index d970cc3..0000000 --- a/R/classes.R +++ /dev/null @@ -1,29 +0,0 @@ -classes = function(x, model = "binomial") { - stopifnot(inherits(x, "TAPACLOTH")) - if ((model %>% tolower()) %in% c("binomial", "beta-binomial", "terzile")) - { - x$data$data %>% - cbind(x$classifier[[model]]$data) %>% - as_tibble() %>% - print() - } - else{ - cli::cli_alert("No classification using model {.format {model}}") - } -} - -purity_per_sample = function(x) { - stopifnot(inherits(x, "TAPACLOTH")) - - for (model in x$purity_estimate %>% names()) { - if ((model %>% tolower()) %in% c("binomial", "beta-binomial")) - { - x$data$purity %>% - dplyr::rename(input_purity = purity) %>% - full_join(x$purity_estimate[[model]]$reliability, by = "sample") %>% - full_join(x$purity_estimate[[model]]$purity, by = "sample") %>% - print() - } - - } -} \ No newline at end of file diff --git a/R/classify.R b/R/classify.R new file mode 100644 index 0000000..cc834d8 --- /dev/null +++ b/R/classify.R @@ -0,0 +1,116 @@ +#' Classify mutations using a Beta-Binomial model-based test. +#' +#' @param x An object of class \code{'INCOMMON'} generated with function `init`. +#' @param priors +#' @param entropy_cutoff Entropy cut-off for Tier-1 vs Tier-2 assignment. +#' @param rho Over-dispersion parameter. +#' @param karyotypes Karyotypes to be included among the possible classes. +#' @return An object of class `INCOMMON` containing the original input plus +#' the classification data and parameters. +#' @export +#' @importFrom dplyr filter mutate rename select %>% +#' @examples +#' x = init(mutations = example_data$data, +#' sample = example_data$sample, +#' purity = example_data$purity, +#' tumor_type = example_data$tumor_type) +#' x = classify( +#' x = x, +#' priors = priors, +#' entropy_cutoff = 0.2, +#' rho = 0.01, +#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") +#' ) +#' print(x) +classify = function(x, + priors = NULL, + entropy_cutoff = 0.2, + rho = 0.01, + karyotypes = c("1:0", "1:1", "2:0", "2:1", "2:2") + +) + { + stopifnot(inherits(x, "INCOMMON")) + if(is.null(entropy_cutoff)) entropy_cutoff = 1 + + # Output + + output = x + + if (!("fit" %in% names(output))) + output$fit = list() + + cli::cli_h1( + "INCOMMON inference of copy number and mutation multiplicity for sample {.field {x$sample}}" + ) + cat("\n") + + check_input(x) + + cli::cli_alert_info("Performing classification") + + x = idify(x) + + tests = lapply(ids(x), function(id) { + # Control for duplicates + if(info(x, mutation_id = id) %>% nrow() > 1){ + cli_alert_warning(text = "More than one mutation mapped at: {.field {id}}") + info(x, id) + + cli_alert_warning(text = "Keeping first row by default (check your input data)") + w = which(data(x)$id==id) + x$data = x$data[-w[2:length(w)],] + } + + # Compute model likelihood, posterior and entropy + + compute_posterior( + NV = NV(x, id), + DP = DP(x, id), + gene = gene(x, id), + priors = priors, + tumor_type = tumor_type(x), + purity = purity(x), + entropy_cutoff = entropy_cutoff, + rho = rho, + karyotypes = karyotypes, + ) + }) + + names(tests) = ids(x) + + # Maximum a posteriori classification + map_estimates = lapply(ids(x), function(id){ + + map = tests[[id]] %>% + dplyr::group_by(NV) %>% + dplyr::filter(value == max(value)) %>% + dplyr::filter(NV == NV(x, id)) %>% + dplyr::ungroup() + + if(nrow(map)>1){ + cli_alert_warning(text = + "With purity {.field {purity(x)}} karyotype {.field {map$karyotype}} with multiplicities {.field {map$multiplicity}} have the same likelihood" + ) + cli_alert_warning(text = + "Simplest case will be selected: {.field {map$karyotype[1]}}" + ) + + map = map[1.,] + } + + map %>% dplyr::select(label, state, value, entropy) %>% dplyr::rename(posterior = value) +}) %>% do.call(rbind, .) + + # Add fit object + output$fit = list( + classification = dplyr::bind_cols(data(x), map_estimates), + params = tibble(entropy_cutoff = entropy_cutoff, + rho = rho), + posterior = tests + ) + + return(output) +} + + diff --git a/R/compute_density.R b/R/compute_likelihood.R similarity index 65% rename from R/compute_density.R rename to R/compute_likelihood.R index da8d54d..3a689be 100644 --- a/R/compute_density.R +++ b/R/compute_likelihood.R @@ -1,4 +1,4 @@ -compute_density = function(NV, DP, prob, rho) { +compute_likelihood = function(NV, DP, prob, rho) { density = VGAM::dbetabinom( x = 1:DP, size = DP, diff --git a/R/compute_posterior.R b/R/compute_posterior.R new file mode 100644 index 0000000..79a2d00 --- /dev/null +++ b/R/compute_posterior.R @@ -0,0 +1,138 @@ +#' Compute model likelihood, posterior and entropy +#' +#' @param NV Number of reads with the variant. +#' @param DP Sequencing coverage of the mutated genome site. +#' @param gene Gene name or symbol. +#' @param priors Prior distribution. +#' @param purity Purity of the sample. +#' @param tumor_type Tumor type of the sample. +#' @param entropy_cutoff Cut-off on entropy for Tier-1/Tier-2 distinction. +#' @param model Model used for the classification task, either "Binomial" (no over-dispersion) +#' or "Beta-Binomial" (over-dispersion included), that will be used as the expected +#' distribution for the number of reads with variant at fixed coverage and purity. +#' @param rho If "Beta-Binomial" model is selected, this parameter tunes the over-dispersion +#' of the expected distribution used for the classification. +#' @param karyotypes Karyotypes to be included among the possible classes. +#' @return A tibble including ploidy, multiplicity, proportion of wt alleles, +#' uncertainty of the class assignment, model density. +#' @export +#' @importFrom dplyr filter mutate rename select %>% +#' @examples +#' binomial_test(test = 220, +#' DP = 500, +#' purity = 0.98, +#' cutoff = 0.9, +#' model = "Beta-Binomial", +#' rho = 0.01, +#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") +#') + +compute_posterior = function(NV, + DP, + gene, + priors = NULL, + tumor_type, + purity, + entropy_cutoff, + rho = 0.01, + karyotypes, + assign_extremes + ) +{ + NV_x = 1:DP + + # Density + db = function(Major, minor, prior, gene) + { + + expected_peaks = CNAqc:::expected_vaf_peak(Major, minor, purity)$peak + + lapply(expected_peaks %>% seq_along(), function(p) { + + # Classification label + label = paste0(Major + minor, 'N (Mutated: ', p, "N)") + + if(!(label %in% priors$label)) cli::cli_alert_danger("Incomplete prior distribution!") + stopifnot(label %in% priors$label) + prior = prior %>% dplyr::filter(label == !!label) %>% dplyr::pull(p) + + # Expected VAF peak of mixture component + expected_peak = expected_peaks[p] + + # Beta-Binomial likelihood distribution + likelihood = compute_likelihood(NV = NV_x, DP = DP, prob = expected_peak, rho = rho) + + # # Posterior distribution + # if (is.null(priors)){ + # prior = 1 + # } else { + # prior = get_prior(priors, gene, tumor_type, label) + # } + + posterior = prior*likelihood + posterior = posterior/sum(posterior) # Normalise + + # Output + out = data.frame( + value = posterior, + NV = NV_x, + Major = Major, + minor = minor, + ploidy = Major + minor, + multiplicity = p, + karyotype = paste0(Major, ":", minor), + label = label, + peak = expected_peak + ) + + out + + }) %>% + do.call(bind_rows, .) + } + + if(is.na(purity)){ + cli_alert_warning(text = + "With purity {.field {purity}} classification is not possible." + ) + return(tibble(ploidy = NA, + multiplicity = NA, + entropy = NA, + label = NA, + density = list(NULL))) + } + + # Compute posterior distribution of 0 < NV <= DP for each component of the mixture (karyotype) + # Prior distribution + if (is.null(priors)){ + prior = 1 + } else { + prior = get_prior(priors, gene, tumor_type) + } + posterior = lapply(karyotypes, function(k) { + alleles = strsplit(k, split = ":")[[1]] %>% as.integer() + db(Major = alleles[1], + minor = alleles[2], + prior = prior, + gene = gene) + }) %>% + bind_rows() + + # Compute entropy + posterior = posterior %>% + dplyr::group_by(NV) %>% + dplyr::reframe(value, NV, Major, minor, ploidy, multiplicity, karyotype, label, peak, entropy = -sum(value*log2(value))) + + # Apply entropy cutoff + posterior = posterior %>% + dplyr::mutate(state = dplyr::case_when( + label %in% c('4N (Mutated: 1N)', '3N (Mutated: 1N)') | entropy > entropy_cutoff ~ 'Tier-2', + entropy <= entropy_cutoff & label %in% c("2N (Mutated: 1N)") ~ "HMD", + entropy <= entropy_cutoff & label %in% c("1N (Mutated: 1N)") ~ "LOH", + entropy <= entropy_cutoff & label %in% c("2N (Mutated: 2N)") ~ "CNLOH", + entropy <= entropy_cutoff & label %in% c("3N (Mutated: 2N)", "4N (Mutated: 2N)") ~ "AM" + )) + + return(posterior) +} + \ No newline at end of file diff --git a/R/estimate_purity.R b/R/estimate_purity.R deleted file mode 100644 index 8bd7a57..0000000 --- a/R/estimate_purity.R +++ /dev/null @@ -1,62 +0,0 @@ -#' Infer sample purity using a mixture of Binomial or Beta-Binomial distributions. -#' -#' @param x An object of class \code{'TAPACLOTH'} generated with function `init`. -#' @param eps In case data is fitted with a mixture of 2 distributions, sample purity -#' is estimated using only that whose peak is closer to the input purity estimate -#' than the value of this parameter. If both distributions satisfy this condition, -#' the one which is closest to the input purity estimate is used for the new estimate. -#' -#' @return An object of class `TAPACLOTH` containing the inferred purity. -#' @export -#' -#' @examples -#' input = init(mutations = example_data$data, sample = example_data$sample, purity = example_data$purity) -#' out = estimate_purity(x = input, model = "binomial", eps = 0.01) -#' print(out) -estimate_purity = function(x, - eps = 0.01) - { - - stopifnot(inherits(x, "TAPACLOTH")) - - # Output - test = x - if (!("purity_estimate" %in% names(test))) - test$purity_estimate = list() - - cli::cli_h1( - "TAPACLOTH purity estimate of sample {.field {get_sample(x)}} using a Beta-binomial model" - ) - cat("\n") - - # Prepare data for BMix - input = data.frame(successes = get_data(x) %>% pull(NV), - trials = get_data(x) %>% pull(DP)) - # Return NA if NVs are less than 3 - if (nrow(input) <= 3) { - cli::cli_alert("There are less than 4 SNVs: purity will not be estimated") - return(test) - } - - fit = BMix::bmixfit(input, K.Binomials = 0, K.BetaBinomials = 1:3) - n_binomials = fit$K["BB"] - peaks = sort(fit$BB.params["mu",]) %>% as.double() - purity_bmix = purity_from_fit( - n_binomials = n_binomials, - peaks = peaks, - purity = get_purity(x), - eps = eps - ) - plot_bmix = BMix::plot.bmix(fit, input) - - # Prepare output - fit$data = input - fit$plot_bmix = BMix::plot.bmix(fit, fit$data) - fit$eps = eps - fit$purity = min(purity_bmix, 1) %>% round(2) - fit$reliability = ifelse(get_purity(x) == 0.0, NA, 1-sqrt(((get_purity(x)-fit$purity)/fit$purity)**2)) - - test$purity_estimate = fit - - return(test) -} diff --git a/R/init.R b/R/init.R index f0937b0..2b1054a 100644 --- a/R/init.R +++ b/R/init.R @@ -6,22 +6,24 @@ #' reference allel `ref`, alternative allele `alt`, integer sequencing depth `DP`, integer number #' of reads with variant `NV`, variant allelic frequency `VAF` and gene name `gene` #' as Hugo Symbol. -#' @param sample Sample name. -#' @param purity Sample purity. +#' @param sample Sample name or ID. +#' @param tumor_type Tumor type of the sample. +#' @param purity Purity of the sample. #' @param gene_roles A tibble reporting a list of `gene` names and associated #' `gene_role` among "oncogene", "TSG" and "fusion". The default is taken from COSMIC Cancer Gene Census. -#' @return An object of class \code{TAPACLOTH}. +#' @return An object of class \code{INCOMMON}. #' @export #' @examples -#' input = init(mutations = example_data$data, sample = example_data$sample, purity = example_data$purity) +#' input = init(mutations = example_data$data, sample = example_data$sample, purity = example_data$purity, tumor_type = example_data$tumor_type) #' print(input) -init = function(mutations, sample, purity, gene_roles = TAPACLOTH::cancer_gene_census){ +init = function(mutations, sample, tumor_type, purity, gene_roles = INCOMMON::cancer_gene_census){ mutations = dplyr::left_join(mutations, gene_roles, by = "gene") out = list( data = mutations, sample = sample, - purity = purity + purity = purity, + tumor_type = tumor_type ) - class(out) = "TAPACLOTH" + class(out) = "INCOMMON" return(out) } diff --git a/R/plot_classification.R b/R/plot_classification.R new file mode 100644 index 0000000..ac7f96d --- /dev/null +++ b/R/plot_classification.R @@ -0,0 +1,99 @@ +#' Visualize classification results for individual observations. +#' +#' @param x An object of class \code{'INCOMMON'} containing the classification results, as +#' produced by function `classify`. +#' @return An object or a list of objects of class \code{'ggplot2'}. +#' @export +#' @importFrom dplyr filter mutate rename select %>% +#' @examples +#' x = init(mutations = example_data$data, +#' sample = example_data$sample, +#' purity = example_data$purity, +#' tumor_type = example_data$tumor_type) +#' x = classify( +#' x = x, +#' priors = priors, +#' entropy_cutoff = 0.2, +#' rho = 0.01, +#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") +#' ) +#' plot_test(x = x, id = ids(x)[1]) +plot_classification = function(x, assembly = F){ + + stopifnot(inherits(x,"INCOMMON")) + + plots = lapply(ids(x), function(id){ + + # Get posterior of specific classified mutations in easy-to-plot format + toplot = posterior(x, id = id) + toplot = toplot %>% dplyr::group_by(NV) %>% dplyr::reframe(max = (value == max(value)), dplyr::across(dplyr::everything())) + + toplot$multiplicity = factor(toplot$multiplicity) + toplot$ploidy = factor(toplot$ploidy) + + highlight = toplot %>% dplyr::filter(max) + highlight = lapply(split(highlight, highlight$label), function(s){ + if(nrow(s) >= 20){ + stride = as.integer(nrow(s)/20) + s = s[seq(1,nrow(s),stride),] + } + s + }) %>% do.call(rbind, .) + + # Get scale factor for overlapped visualisation of posterior and entropy + scaleFactor = max(toplot$value)/max(toplot$entropy) + scaleFactor = ifelse(is.infinite(scaleFactor), 1, scaleFactor) + + plot = toplot %>% + ggplot() + + geom_line(aes(x = NV, y = value), + color = 'white', + size = .5, + alpha = 0)+ + geom_line( + aes(x = NV, y = entropy * scaleFactor), + color = "deeppink4", + size = 0.5, + linetype = "longdash", + alpha = .4 + ) + + geom_point(data = highlight, + aes(x = NV, + y = value, + color = ploidy, + shape = multiplicity + ), + size = 1) + + geom_line(data = highlight, + aes(x = NV, + y = value, + color = ploidy, + group = label + ), + size = .5) + + geom_vline( + xintercept = NV(x, id), + color = 'black', + linetype = 'dashed', + size = .5 + )+ + scale_size_manual(values = c("1"=.3,"2"=.3))+ + scale_y_continuous("Classification Probability", + sec.axis = sec_axis(~./scaleFactor, name = "Classification Entropy"), + limits = c(0,max(toplot$value)))+ + scale_color_manual(values = ploidy_colors)+ + CNAqc:::my_ggplot_theme() + + theme(axis.title.y.right = element_text(color="deeppink4"), + axis.text.y.right = element_text(color="deeppink4"))+ + guides(size = "none", + shape = guide_legend(title = 'Multiplicity', ncol = 1), + color = guide_legend(title = 'Ploidy', ncol = 2))+ + labs(title = paste0(info(x, id)$gene, ' (', info(x, id)$state, ')'), + subtitle = paste0(x$sample, ' (Purity = ', purity(x), '); ', info(x, id)$chr, '; ', info(x, id)$from, '; ', info(x, id)$ref, '>', info(x, id)$alt), + caption = paste0('Entropy cut-off: ', parameters(x)$entropy_cutoff, '; Overdispersion: ', parameters(x)$rho)) + + }) + if(assembly){ + ggpubr::ggarrange(plotlist = plots, ncol = 4) + } else plots +} \ No newline at end of file diff --git a/R/plot_fit.R b/R/plot_fit.R deleted file mode 100644 index f646d0f..0000000 --- a/R/plot_fit.R +++ /dev/null @@ -1,65 +0,0 @@ -plot_fit = function(x, target_gene, sample_name, model) { - # if(x$classifier$model == "terzile") { - # x$data = x$data %>% - # dplyr::mutate(class = class_terzile) - # } else { - # x$data = x$data %>% - # dplyr::mutate(class = class_binom) - # } - - # Select sample - data = x$data$data %>% - cbind(x$classifier[[model]]$data) %>% - dplyr::filter(sample == sample_name) - - # Get class of mutations on target gene - gene_status = data %>% - dplyr::filter(gene == target_gene) %>% - dplyr::mutate(class = paste0(class, ' (', nv, '/', dp, ')')) %>% - dplyr::pull(class) %>% - paste(collapse = ', ') - - # Get VAF of mutations on target gene - gene_vafs = data %>% - dplyr::filter(gene == target_gene) %>% - dplyr::pull(VAF) - - # Fit plot - colormap = ggsci::pal_jama("default")(7)[1:3] - names(colormap) = c("Clonal", "Clonal LOH", "Subclonal") - fit_plot = data %>% - ggplot2::ggplot() + - CNAqc:::my_ggplot_theme() + - ggplot2::geom_histogram(binwidth = 0.01, aes(VAF, fill = class)) + - ggplot2::scale_fill_manual(values = colormap)+ - # ggsci::scale_fill_jama() + - xlim(0, 1) + - ggplot2::labs( - title = sample_name, - subtitle = paste0("Purity: ", filter(x$data$purity, sample == sample_name)$purity, " - ", target_gene, ": ", gene_status) - ) + - ggplot2::guides(fill = guide_legend(''), - color = guide_legend('', override.aes = aes(fill = NA))) + - ggplot2::geom_vline(aes(xintercept = gene_vafs, - color = "Target VAF"), - linetype = 6) + - ggplot2::geom_vline( - aes(xintercept = filter(x$data$purity, sample == sample_name)$purity, - color = "Purity"), - linetype = 'dashed', - size = .6 - ) + - ggplot2::geom_vline(aes(xintercept = filter(x$data$purity, sample == sample_name)$purity / 2, - color = "Clonal peak"), - linetype = 'dashed') + - ggplot2::scale_color_manual( - name = "", - values = c( - `Target VAF` = "indianred3", - `Purity` = "black", - `Clonal Peak` = "gray" - ) - ) - - return(fit_plot) -} \ No newline at end of file diff --git a/R/plot_heatmap.R b/R/plot_heatmap.R deleted file mode 100644 index 1d1ba54..0000000 --- a/R/plot_heatmap.R +++ /dev/null @@ -1,159 +0,0 @@ -#'Plotting function for class \code{'TAPACLOTH'}. -#' @description -#' Generates a composite heatmap with useful information on data and classification results. -#' @param x An obj of class \code{'TAPACLOTH'} containing a `classifier`. -#' @param model Model used for the test, either "Binomial" or "Beta-Binomial". -#' @import pheatmap -#' @import CNAqc -#' @import ggplot2 -#' @import ggsci -#' @import ggridges -#' @importFrom dplyr filter mutate rename select %>% -#' @return An object of class \code{'TAPACLOTH'} containing a list of ggplot2 -#' plots named `plot_test` inside `classifier`. -#' @export -#' @examples -#' x = init(mutations = example_data$data, sample = example_data$sample, purity = example_data$purity) -#' x = run_classifier(x) -#' plot_heatmap(x) -plot_heatmap = function(x, model = 'binomial') -{ - stopifnot(inherits(x, "TAPACLOTH")) - - model = model %>% tolower() - - # Tibble 2 matrix - t2m = function(x){ - m = x[, 2:ncol(x)] %>% as.matrix() - colnames(m) = names(x)[2:length(names(x))] - rownames(m) = x$label - m - } - - # cli::cli_alert("Plotting model pheatmap") - - input_classes = x %>% - TAPACLOTH:::get_classifier(model = model) %>% - TAPACLOTH:::get_data() %>% - dplyr::mutate( - label = paste0(gene, " (", from, ") ", ref, '>', alt), - label_k = paste(ploidy, ' (', multiplicity, ')', sep = '') - ) - - # Matrix of the classifications - input_classes_matrix = input_classes %>% - dplyr::mutate(outcome = ifelse(outcome, 1, 0)) %>% - dplyr::select(label, label_k, outcome) %>% - tidyr::pivot_wider(names_from = label_k, values_from = outcome) - - # Removed - # input_classes_matrix$other = 0 - # no_classes = apply(input_classes_matrix[2:ncol(input_classes_matrix)], 1, function(x) all(x == 0)) - # input_classes_matrix$other[no_classes] = 1 - - # Idiotic formats - matrix_outcomes = input_classes_matrix %>% t2m() - - # p-values - pvalue_matrix = input_classes %>% - dplyr::select(label, label_k, pvalue) %>% - tidyr::pivot_wider(names_from = label_k, values_from = pvalue) - - p_matrix = pvalue_matrix %>% t2m - - # Nicola - I don't understand why I have to use <, shouldn't it be >= to remove non-significant ones? - alpha_test = TAPACLOTH::get_params(x) %>% - dplyr::filter(model == !!model) %>% - pull(alpha) - - p_matrix[p_matrix > 1-alpha_test] = NA - - p_matrix = apply(p_matrix, c(1, 2), function(x) { - if (is.na(x)) return('') - if (x < alpha_test*0.001) return("***") - if (x < alpha_test*0.01) return("**") - return("*") - }) - - # Read counts data - df_counts = input_classes %>% - dplyr::select(label, NV, DP, VAF) %>% - dplyr::distinct() %>% - data.frame() - - rownames(df_counts) = df_counts$label - df_counts = df_counts[, 2:ncol(df_counts)] - - # Roles - roles_all = input_classes$gene_role %>% unique() - roles_all = strsplit(roles_all, ',') %>% unlist %>% unique - roles_all_others = roles_all[!(roles_all %in% c("TSG", "oncogene", NA))] - - df_roles = input_classes %>% - dplyr::select(label, gene_role) %>% - dplyr::rename(role = gene_role) %>% - dplyr::distinct() %>% - rowwise() %>% - mutate( - TSG = grepl('TSG', role), - oncogene = grepl('oncogene', role), - other = sapply(roles_all_others, function(x) - grepl(x, role)) %>% any, - unknown = is.na(role) - ) %>% - dplyr::select(-role) %>% - as.data.frame() - - rownames(df_roles) = df_roles$label - df_roles = df_roles[,-1] - - # df_roles = apply(df_roles, 2, function(x) ifelse(x, 1, 0)) %>% data.frame - df_roles = apply(df_roles, 2, function(x) - ifelse(x, "YES", "NO")) %>% data.frame - - # Annotation rows - annotation_rows = cbind(df_counts, df_roles) - - # I decided we do not show these - annotation_rows = annotation_rows[,!apply(annotation_rows, 2, function(x) - all(x == "NO"))] - annotation_rows = annotation_rows[, colnames(annotation_rows) != 'NV'] - annotation_rows = annotation_rows[, colnames(annotation_rows) != 'other'] - - presence_annotation = 'darkgray' - onco_color = 'orange' - tsg_color = 'steelblue' - - annotation_colors = list( - TSG = c(`NO` = "white", `YES` = tsg_color), - oncogene = c(`NO` = "white", `YES` = onco_color), - other = c(`NO` = "white", `YES` = presence_annotation), - unknown = c(`NO` = "white", `YES` = presence_annotation), - # DP = RColorBrewer::brewer.pal(10, 'Purples'), - DP = viridis::viridis(10, direction = -1), - NV = RColorBrewer::brewer.pal(8, 'Purples'), - # VAF = RColorBrewer::brewer.pal(10, 'Spectral') - VAF = viridis::viridis(10, direction = -1) - ) - - pheatmap::pheatmap( - mat = matrix_outcomes, - main = x$sample, - cluster_cols = FALSE, - fontsize_row = 6, - fontsize = 7, - cellwidth = 20, - cellheight = 10, - display_numbers = p_matrix, - number_color = 'white', - # border_color = NA - gaps_col = c(1, 2, 4, 6, 8), - breaks = c(-1, 0, 1), - color = c(`0` = ggplot2::alpha('gainsboro', .5), `1` = 'indianred3'), - legend_breaks = c(0, 1), - legend_labels = c(`0` = "Reject H0", `1` = "Fail to\nReject H0"), - annotation_row = annotation_rows, - annotation_colors = annotation_colors - ) - -} diff --git a/R/plot_test.R b/R/plot_test.R deleted file mode 100644 index e030fcd..0000000 --- a/R/plot_test.R +++ /dev/null @@ -1,169 +0,0 @@ -#' Visualize classification results for individual observations. -#' -#' @param x An object of class \code{'TAPACLOTH'} containing the classification results, as -#' produced by function `run_classifier`. -#' @return An object of class \code{'ggplot2'}. -#' @export -#' @importFrom dplyr filter mutate rename select %>% -#' @examples -#' x = init(mutations = example_data$data, -#' sample = example_data$sample, -#' purity = example_data$purity) -#' model = "beta-binomial" -#' x = run_classifier( -#' x = x, -#' model = model, -#' rho = 0.01, -#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") -#' ) -#' plot_test(x = x,id = x$classifier[[model]]data$id[1],model = model) -plot_test = function(x, assembly = F){ - - stopifnot(inherits(x,"TAPACLOTH")) - - colors = CNAqc:::get_karyotypes_colors(c('1:0', '1:1', '2:0', '2:1', '2:2')) - colors = colors[c("1:0","1:1","2:1","2:2")] - names(colors) = sapply(names(colors), function(n){ - strsplit(n,split = ":")[[1]] %>% as.integer() %>% sum() - }) - - # names_colors = expand.grid(names(colors), 1:2) %>% apply(1, paste, collapse = ' ') - # colors = sapply(names_colors, function(n) - # colors[strsplit(n, ' ')[[1]][1]]) - # names(colors) = sapply(names_colors, function(n){ - # splitted = strsplit(n, ' ')[[1]] - # k = splitted[1] - # m = splitted[2] - # paste0(strsplit(k, ':')[[1]] %>% as.integer() %>% sum(),"N (Mutated: ", m,"N)") - # }) - - colors = c(colors, "out of sample" = 'gray') - - cutoff = x %>% get_params() %>% pull(cutoff) - - plots = lapply(x$classifier$data$id, function(id){ - - # uncertainty = round(x %>% get_classifier() %>% get_data() %>% - # filter(id == !!id) %>% pull(uncertainty),2) - - - mdata = x %>% get_classifier(model) %>% - get_data() %>% - filter(id == !!id) - - # dataset = pull(mdata, density)[[1]] %>% - # group_by(NV) %>% - # summarise( - # density, - # Major, - # minor, - # multiplicity, - # karyotype, - # label, - # peak, - # cutoff, - # uncertainty = 1 - max(density / sum(density)) - # ) %>% - # ungroup() - - dataset = pull(mdata, density)[[1]] - - dataset = dataset %>% - group_by(NV) %>% - reframe(entropy = 1-max(density)/sum(density), density, ploidy, multiplicity, peak, cutoff, label, p_assign) - - dataset = lapply(dataset$label %>% unique(), function(l){ - d = dataset %>% - filter(label==l) - n_points = 250 - stride = max(round(nrow(d)/n_points),1) - d[seq(1,nrow(d),stride),] - }) %>% - do.call(rbind, .) - - label = mdata$label - - scaleFactor = max(dataset$density)/max(dataset$entropy) - scaleFactor = ifelse(is.infinite(scaleFactor), 1, scaleFactor) - - dataset$multiplicity = factor(dataset$multiplicity) - dataset %>% - mutate(ploidy = ifelse(label=="out of sample", label, ploidy)) - dataset$ploidy = factor(dataset$ploidy) - - dataset %>% - ggplot() + - geom_line(aes(x = NV, y = density), color = 'white', size = .5, alpha = 0)+ - geom_line( - aes(x = NV, y = entropy * scaleFactor), - color = "deeppink4", - size = 0.5, - linetype = "longdash", - alpha = .4 - ) + - CNAqc:::my_ggplot_theme() + - theme(axis.title.y.right = element_text(color="deeppink4"), - axis.text.y.right = element_text(color="deeppink4"))+ - geom_point(data = dataset %>% maximise(), - aes(x = NV, - y = density, - color = ploidy, - shape = multiplicity - ), - size = 1) + - scale_color_manual(values = colors, breaks = dataset$ploidy %>% unique(), name = "Ploidy")+ - scale_shape_manual(values = data_frame("1"=16,"2"=17), name = "Multiplicity")+ - geom_line(data = dataset %>% maximise(), - aes(x = NV, - y = density, - color = ploidy, - size = multiplicity))+ - scale_size_manual(values = c("1"=.3,"2"=.3))+ - scale_y_continuous("Likelihood", - sec.axis = sec_axis(~./scaleFactor, name = "Entropy"), - limits = c(0,max(dataset$density)))+ - guides(size = "none")+ - labs( - title = paste0( - mdata$from, - "; ", - mdata$ref, - ">", - mdata$alt, - "; ", - mdata$gene, - " (", - mdata$gene_role, - ")" - ), - subtitle = paste0( - "Classification: ", - label, - "\nSample: ", - get_sample(x), - "; Purity: ", - get_purity(x) - ), - caption = bquote(~alpha*" = "*.(cutoff)~ - "; Entropy: "*.(round(mdata$entropy,2))~ - " (mean: "*.(round(mdata$mean_entropy,2))* - ")") - ) + - # guides(color = 'none') + - # geom_hline(data = dataset %>% group_by(label) %>% summarise(cutoff) %>% unique() %>% filter(label != "out of sample"), - # mapping = aes(yintercept = cutoff, - # color = label), - # linetype = 'dashed', - # size = .5 - # )+ - geom_vline( - xintercept = get_NV(x, id), - color = 'black', - linetype = 'dashed', - size = .5 - ) - }) - if(assembly){ - ggpubr::ggarrange(plotlist = plots, ncol = 4) - } else plots -} \ No newline at end of file diff --git a/R/plot_test_power.R b/R/plot_test_power.R deleted file mode 100644 index b0745a1..0000000 --- a/R/plot_test_power.R +++ /dev/null @@ -1,242 +0,0 @@ -plot_test_power = function(null_model) - -{ - inputs = null_model$density - coverage = null_model$coverage - p = null_model$purity/2 - alpha_level = null_model$alpha_level - nvs = null_model$density$nv %>% as.vector() - l_a = null_model$nv[1] - r_a = null_model$nv[2] - l_v = null_model$vaf[1] - r_v = null_model$vaf[2] - - model_string = "Binomial" - if(null_model$model == 'Beta-Binomial') - model_string = paste0("Beta-Binomial (rho = ", null_model$rho, ')') - - col_loh = 'steelblue' - col_subclonal = 'indianred3' - col_clonal = 'forestgreen' - - ggplot2::ggplot() + - CNAqc:::my_ggplot_theme() + - ggplot2::geom_rect( - data = data.frame( - xmin = 0, - xmax = nvs[l_a], - ymin = 0, - ymax = 1 - ), - aes( - xmin = xmin, - xmax = xmax, - ymin = ymin, - ymax = ymax - ), - fill = col_subclonal, - alpha = .2 - ) + - ggplot2::geom_rect( - data = data.frame( - xmax = coverage, - xmin = nvs[r_a], - ymin = 0, - ymax = 1 - ), - aes( - xmin = xmin, - xmax = xmax, - ymin = ymin, - ymax = ymax - ), - fill = col_loh, - alpha = .2 - ) + - ggplot2::geom_rect( - data = data.frame( - xmax = nvs[l_a], - xmin = nvs[r_a], - ymin = 0, - ymax = 1 - ), - aes( - xmin = xmin, - xmax = xmax, - ymin = ymin, - ymax = ymax - ), - fill = col_clonal, - alpha = .2 - ) + - ggplot2::geom_point( - data = inputs %>% filter(nv < nvs[l_a]), - aes(x = nv, y = p), - size = .6, - color = col_subclonal - ) + - ggplot2::geom_point( - data = inputs %>% filter(nv > nvs[r_a]), - aes(x = nv, y = p), - size = .6, - color = col_loh - ) + - ggplot2::geom_point( - data = inputs %>% filter(nv >= nvs[l_a], nv <= nvs[r_a]), - aes(x = nv, y = p), - size = 1, - color = col_clonal - ) + - ggplot2::geom_point( - data = inputs, - aes(x = nv, y = VAF), - size = .6, - shape = 3, - color = 'gray' - ) + - ggplot2::geom_point( - data = inputs %>% filter(nv == nvs[l_a]), - aes(x = nv, y = VAF), - size = 3, - color = col_subclonal - ) + - ggplot2::geom_segment( - data = inputs %>% filter(nv == nvs[l_a]), - aes( - x = nvs[l_a], - y = VAF, - xend = coverage, - yend = VAF - ), - linetype = 'dashed', - color = col_subclonal - ) + - ggplot2::geom_segment( - data = inputs %>% filter(nv == nvs[l_a]), - aes( - x = nvs[l_a], - y = VAF, - xend = nvs[l_a], - yend = 0 - ), - linetype = 'dashed', - color = col_subclonal - ) + - ggplot2::geom_point( - data = inputs %>% filter(nv == nvs[r_a]), - aes(x = nv, y = VAF), - size = 3, - color = col_loh - ) + - ggplot2::geom_segment( - data = inputs %>% filter(nv == nvs[r_a]), - aes( - x = nvs[r_a], - y = VAF, - xend = coverage, - yend = VAF - ), - linetype = 'dashed', - color = col_loh - ) + - ggplot2::geom_segment( - data = inputs %>% filter(nv == nvs[r_a]), - aes( - x = nvs[r_a], - y = VAF, - xend = nvs[r_a], - yend = 1 - ), - linetype = 'dashed', - color = col_loh - ) + - ggplot2::geom_text( - x = nvs[l_a] - 5, - y = 1, - label = 'subclonal', - hjust = 1, - size = 3, - color = col_subclonal - ) + - ggplot2::geom_text( - x = nvs[r_a] + 5, - y = 0, - label = 'clonal LOH', - hjust = 0, - size = 3, - color = col_loh - ) + - ggplot2::labs( - x = 'NV', - y = "1 - P(X > NV)", - caption = model_string, - title = paste0("Coverage ", coverage, ' with purity ', p*2), - subtitle = paste0('Alpha-level: ', alpha_level) - ) + - ggplot2::coord_cartesian(clip = 'off') + - ggplot2::scale_y_continuous(sec.axis = sec_axis(~ ., name = 'VAF | coverage')) + - ggplot2::theme(axis.line.y.right = element_line(color = "gray")) + - ggplot2::annotation_custom( - grid::textGrob(paste('>', round(r_v, 2)), gp = grid::gpar(col = col_loh, fontsize = 8)), - xmin = coverage, - xmax = coverage, - ymin = r_v + .03, - ymax = r_v + .03 - ) + - ggplot2::annotation_custom( - grid::textGrob( - paste('<', round(l_v, 2)), - gp = grid::gpar(col = col_subclonal, fontsize = 8) - ), - xmin = coverage, - xmax = coverage, - ymin = l_v - .03, - ymax = l_v - .03 - ) + - ggplot2::annotate( - "text", - x = 0, - y = .95, - label = "Subclonal", - hjust = 0, - size = 3, - color = col_subclonal - ) + - ggplot2::annotate( - "text", - x = coverage, - y = .95, - label = "Clonal LOH", - hjust = 1, - size = 3, - color = col_loh - ) + - ggplot2::annotate( - "text", - x = coverage * p, - y = .95, - label = "Clonal", - hjust = .5, - size = 3, - color = col_clonal - ) -} - -example_cartoons = function() -{ - ggpubr::ggarrange( - plot_test_power(140, .3), - plot_test_power(140, .3, model = 'betabinomial', rho = 0.001), - plot_test_power(140, .3, model = 'betabinomial', rho = 0.01), - ncol = 1, - nrow = 3 - ) - - ggpubr::ggarrange( - plot_test_power(700, .4), - plot_test_power(700, .4, model = 'betabinomial', rho = 0.001), - plot_test_power(700, .4, model = 'betabinomial', rho = 0.01), - ncol = 1, - nrow = 3 - ) -} diff --git a/R/print.INCOMMON.R b/R/print.INCOMMON.R new file mode 100644 index 0000000..396cd0b --- /dev/null +++ b/R/print.INCOMMON.R @@ -0,0 +1,78 @@ +#' Plot for class \code{'INCOMMON'}. +#' + +#' @description +#' +#' If the terzile model is used, the default plot is a histogram of the VAF spectrum is plotted, +#' with expected clonal peak, purity and VAF of mutations affecting the target gene +#' highlighted. If the Binomial or Beta-Binomial model is used, it includes +#' a plot showing details of the used statistical test in the bottom panel. +#' +#' @param x An obj of class \code{'INCOMMON'}. +#' @param... Default S3 method parameter. +#' +#' @return Nothing. +#' +#' @export +#' +#' @examples +#' input = init(mutations = example_data$data[1,], sample = example_data$sample, purity = example_data$purity) +#' x = run_classifier(input, alpha_level = 1e-3, model = "Binomial") +#' x = plot_test(x) +#' plot.INCOMMON(x) + +plot.INCOMMON = function(x, ...) { + stopifnot(inherits(x, "INCOMMON")) + lapply(names(x$classifier), function(model){ + lapply(x$classifier[[model]]$data$id, function(id){ + plot_test(x, id = id, model = model) + }) + }) +} +#' Print for class \code{'INCOMMON'}. +#' @param x An obj of class \code{'INCOMMON'}. +#' @param ... Default S3 method parameter. +#' +#' @return Nothing. +#' @import cli +#' @import crayon +#' @importFrom dplyr filter mutate rename select %>% +#' @export +#' +#' @examples +#' x = init(mutations = example_data$data, +#' sample = example_data$sample, +#' purity = example_data$purity, +#' tumor_type = example_data$tumor_type) +#' x = classify( +#' x = x, +#' priors = priors, +#' entropy_cutoff = 0.2, +#' rho = 0.01, +#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") +#' ) +#' print(x) +print.INCOMMON = function(x, ...) { + stopifnot(inherits(x, "INCOMMON")) + ## Print input data + cli::cli_rule( + paste( + crayon::bgMagenta(crayon::black("[ INCOMMON ] ")), + 'Sample {.field {x$sample}} ({.field {x$tumor_type}}), with purity {.field {x$purity}}' + ) + ) + + if ("fit" %in% names(x)) { + + cli::cli_rule( + paste( + crayon::bgMagenta(crayon::black("[ INCOMMON ] ")), + 'Classified mutations using Beta-binomial model', + 'with overdispersion parameter {.field {parameters(x)$rho}} and entropy cutoff {.field {parameters(x)$entropy_cutoff}}', + '' + )) + print(classification(x)) + } else{ + data(x) + } +} diff --git a/R/print.tapacloth.R b/R/print.tapacloth.R deleted file mode 100644 index be0c756..0000000 --- a/R/print.tapacloth.R +++ /dev/null @@ -1,100 +0,0 @@ -#' Plot for class \code{'TAPACLOTH'}. -#' - -#' @description -#' -#' If the terzile model is used, the default plot is a histogram of the VAF spectrum is plotted, -#' with expected clonal peak, purity and VAF of mutations affecting the target gene -#' highlighted. If the Binomial or Beta-Binomial model is used, it includes -#' a plot showing details of the used statistical test in the bottom panel. -#' -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param... Default S3 method parameter. -#' -#' @return Nothing. -#' -#' @export -#' -#' @examples -#' input = init(mutations = example_data$data[1,], sample = example_data$sample, purity = example_data$purity) -#' x = run_classifier(input, alpha_level = 1e-3, model = "Binomial") -#' x = plot_test(x) -#' plot.TAPACLOTH(x) - -plot.TAPACLOTH = function(x, ...) { - stopifnot(inherits(x, "TAPACLOTH")) - lapply(names(x$classifier), function(model){ - lapply(x$classifier[[model]]$data$id, function(id){ - plot_test(x, id = id, model = model) - }) - }) -} -#' Print for class \code{'TAPACLOTH'}. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param ... Default S3 method parameter. -#' -#' @return Nothing. -#' @import cli -#' @import crayon -#' @importFrom dplyr filter mutate rename select %>% -#' @export -#' -#' @examples -#' x = init(mutations = example_data$data, -#' sample = example_data$sample, -#' purity = example_data$purity) -#' x = run_classifier( -#' x, -#' alpha_level = 1e-3, -#' model = "Binomial") -#' x = estimate_purity(x = x, model = "binomial", eps = 0.01) -#' print(x) -print.TAPACLOTH = function(x, ...) { - stopifnot(inherits(x, "TAPACLOTH")) - ## Print input data - cli::cli_rule( - paste( - crayon::bgMagenta(crayon::black("[ TAPACLOTH ] ")), - 'Input data for sample {.field {x$sample}}, with purity {.field {x$purity}}' - ) - ) - - print(x$data) - - if ("purity_estimate" %in% names(x)){ - cli::cli_rule( - paste( - crayon::bgMagenta(crayon::black("[ TAPACLOTH ] ")), - 'Purity estimate using ', - crayon::bgYellow(crayon::black("[ BMix ] ")), - 'with Beta-binomial model' - ) - ) - print(tibble( - purity = get_purity(x), - reliability = get_reliability(x), - purity_bmix = get_purity_bmix(x), - )) - - cat("\n") - } - - if ("classifier" %in% names(x)) { - - cli::cli_rule( - paste( - crayon::bgMagenta(crayon::black("[ TAPACLOTH ] ")), - 'Test using Beta-binomial model', - 'with overdispersion parameter {.field {x$classifier$params$rho}} and likelihood cutoff {.field {x$classifier$params$cutoff}}', - '' - )) - print(get_classifier(x) %>% - get_data() %>% - dplyr::select(id, NV, DP, VAF, - gene, gene_role, - state, label, - ploidy, multiplicity, - entropy) - ) - } -} diff --git a/R/purity_from_fit.R b/R/purity_from_fit.R deleted file mode 100644 index 2a7be84..0000000 --- a/R/purity_from_fit.R +++ /dev/null @@ -1,20 +0,0 @@ -purity_from_fit = function(n_binomials, peaks, purity, eps) { - out = NA - if (n_binomials == 3) - out = peaks[2] * 2 - if (n_binomials == 1) - out = peaks[1] * 2 - if (n_binomials == 2) { - purity_hyp = c(min(peaks[1] * 2, 1), min(peaks[2] * 2, 1)) # in this case we have two equivalent putative purities - if (!is.na(purity) & - min(abs(purity_hyp - purity)) <= eps) - # check if at least one is close to the input one - out = purity_hyp[which.min(abs(purity_hyp - purity))] # choose the estimate that is closest to input - if (is.na(out)) { - cli::cli_alert( - "Purity could not be estimated reliably. Check results of the fit in the output figure" - ) - } - } - return(out) -} \ No newline at end of file diff --git a/R/run_classifier.R b/R/run_classifier.R deleted file mode 100644 index a40cd06..0000000 --- a/R/run_classifier.R +++ /dev/null @@ -1,103 +0,0 @@ -#' Classify mutations using a (Beta-)Binomial model-based test. -#' -#' @param x An object of class \code{'TAPACLOTH'} generated with function `init`. -#' @param cutoff Likelihood cut-off for class assignment. -#' @param model Model used for the classification task, either "Binomial" (no over-dispersion) -#' or "Beta-Binomial" (over-dispersion included), that will be used as the expected -#' distribution for the number of reads with variant at fixed coverage and purity. -#' @param rho If "Beta-Binomial" model is selected, this parameter tunes the over-dispersion -#' of the expected distribution used for the test. -#' @param karyotypes Karyotypes to be included among the possible classes. -#' @return An object of class `TAPACLOTH` containing the input plus -#' the classification data and paramters. -#' @export -#' @importFrom dplyr filter mutate rename select %>% -#' @examples -#' x = init(mutations = example_data$data, -#' sample = example_data$sample, -#' purity = example_data$purity) -#' x = run_classifier( -#' x = x, -#' model = "Beta-Binomial", -#' rho = 0.01, -#' karyotypes = c("1:0","1:1","2:0","2:1","2:2") -#' ) -#' print(x) -run_classifier = function(x, - cutoff = 0.75, - rho = 0.01, - karyotypes = c("1:0","1:1","2:0","2:1","2:2"), - priors = NULL, - gene_role_specific = FALSE, - assign_extremes = FALSE - ) - { - stopifnot(inherits(x, "TAPACLOTH")) - - # Output - test = x - if (!("classifier" %in% names(test))) - test$classifier = list() - - cli::cli_h1( - "TAPACLOTH clonality/Zygosity testing for sample {.field {x$sample}}" - ) - cat("\n") - - check_input(x) - - cli::cli_alert_info("Computing likelihoods and uncertainties.") - - x = idify(x) - - tests = lapply(x$data$id, function(id) { - if(x$data %>% - dplyr::filter(id==!!id) %>% - nrow() > 1){ - cli_alert_warning(text = "More than one mutation mapped at: {.field {id}}") - x$data %>% - dplyr::filter(id==!!id) - cli_alert_warning(text = "Keeping first one (consider going back to input data)") - w = which(x$data$id==id) - x$data = x$data[-w[2:length(w)],] - - } - # If classifying with a bias towards gene role, adapt karyotypes list accordingly. - k_reduced = karyotypes - if(gene_role_specific & !(x %>% get_gene_role(id) %>% is.na)){ - if(x %>% get_gene_role(id) == "TSG") k_reduced = c("1:0","1:1","2:0") - if(x %>% get_gene_role(id) == "oncogene") k_reduced = c("1:1","2:1","2:2") - } - - # Compute likelihoods - binomial_test( - test = get_NV(x, id), - DP = get_DP(x, id), - gene = get_gene(x, id), - priors = priors, - purity = get_purity(x), - cutoff = cutoff, - rho = rho, - karyotypes = k_reduced, - assign_extremes - ) - }) %>% - do.call(rbind, .) - - - test$classifier = list( - params = tibble(cutoff = cutoff, - rho = rho), - data = bind_cols(x %>% get_data(), tests) - ) - - test$classifier$data = test %>% - get_classifier() %>% - get_data() %>% - reduce_classes() %>% - add_per_state_probabilities() - - return(test) -} - - diff --git a/R/run_test.R b/R/run_test.R deleted file mode 100644 index 64156bf..0000000 --- a/R/run_test.R +++ /dev/null @@ -1,5 +0,0 @@ -run_test = function(nv, null_model){ - if(nv > null_model$nv[2]) return("Clonal LOH") - if(nv > null_model$nv[1]) return("Clonal") - return("Subclonal") - } diff --git a/R/test_setup.R b/R/test_setup.R deleted file mode 100644 index ddc5292..0000000 --- a/R/test_setup.R +++ /dev/null @@ -1,122 +0,0 @@ -#' Compute parameters and distribution for null hypothesis. -#' -#' @param coverage The sequencing coverage at the locus of mutation. -#' @param purity Sample purity. -#' @param rho The overdisperion parameter for "Beta-Binomial" model. -#' @param alpha_level The confidence level of the test. -#' @param model Model used for the test, either "Binomial" or "Beta-Binomial". -#' @return A list. -#' @export -#' @importFrom dplyr filter mutate rename select %>% -#' @import VGAM -#' @import stringr -#' @examples -#' null_model = test_setup(coverage = 500, purity = 1.0, rho = 0.01, alpha_level = 0.01, model = 'binomial') -#' print(null_model) -test_setup = function(coverage = 500, - purity = 1.0, - rho = 0.01, - threshold = 0.1, - model = 'binomial', - karyotypes = c("1:0", "1:1", "2:0", "2:1", "2:2") -) -{ - - # Range of NV values - nvs = 1:coverage - - # Compute Binomial or Beta-Binomial probability for NV values in range - log_p = NULL - # p = purity/2 - # if ((model %>% tolower()) == 'binomial') - # { - # log_p = sapply(nvs, dbinom, size = coverage, prob = p) - # } - # else - # { - # log_p = sapply( - # nvs, - # VGAM::dbetabinom, - # size = coverage, - # prob = p, - # rho = rho - # ) - # } - - log_p = lapply(karyotypes, function(karyotype) { - alleles = stringr::str_split(string = karyotype, pattern = ":") %>% unlist() %>% as.integer() - ploidy = sum(alleles) - lapply(1:max(alleles), function(mult) { - if ((model %>% tolower()) == 'binomial') { - log_p = sapply(nvs, - dbinom, - size = coverage, - prob = mult * purity / (2 * (1 - purity) + purity * ploidy)) - } - else { - log_p = sapply( - nvs, - VGAM::dbetabinom, - size = coverage, - prob = mult * purity / (2 * (1 - purity) + purity * ploidy), - rho = rho - ) - } - - # Compute P(X > NV) for each NV value - # p_x = cumsum(log_p) - # Find l_a such that P(X <= l_a) < alpha - # l_a = which(p_x < alpha_level, arr.ind = TRUE) %>% max - # - # # Find r_a such that P(X > r_a) < 1 - alpha - # r_a = which(p_x > 1 - alpha_level, arr.ind = TRUE) %>% min - - # Compute two-tailed p-value for each NV value - p_x = sapply(nvs, function(nv){log_p[which(log_p <= log_p[nv])] %>% sum()}) - # p_x = 1-p_x - # p_x = log_p - - # Find left extremum NV l_a such that p-value < alpha - l_a = which(p_x > threshold, arr.ind = TRUE) %>% min - - # Find right extremum NV l_a such that p-value < alpha - r_a = which(p_x > threshold, arr.ind = TRUE) %>% max - - # Adjustments for plots when test fails - if (is.infinite(l_a)) - l_a = 1 - if (is.infinite(r_a)) - r_a = coverage - - # Translate NV cutoffs in VAF space - - vafs = nvs / coverage - l_v = vafs[l_a] - r_v = vafs[r_a] - - return(tibble( - karyotype = karyotype, - multiplicity = mult, - inputs = list(tibble(nv = nvs, - p = p_x, - VAF = vafs)), - l_a = l_a, - r_a = r_a, - l_v = l_v, - r_v = r_v - )) - }) %>% do.call(rbind, .) - }) %>% do.call(rbind, .) - - - # cli::cli_alert_info("Computing p-values.") - - return(list( - model = model, - rho = rho, - coverage = coverage, - purity = purity, - threshold = threshold, - test = log_p - )) -} diff --git a/R/utils.R b/R/utils.R index 38e1fb2..5314923 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,157 +1,112 @@ -get_data = function(x){ +data = function(x){ return(x$data) } -get_sample = function(x){ +samples = function(x){ return(x$sample) } -get_purity = function(x){ +purity = function(x){ return(unique(x$purity)) } -#' Getter for class \code{'TAPACLOTH'}. +tumor_type = function(x){ + return(unique(x$tumor_type)) +} + + +#' Getter for class \code{'INCOMMON'}. #' @description -#' Get significant classification data for the specified model, if already tested. -#' @param x An obj of class \code{'TAPACLOTH'}. +#' Get classification data for specific selected model. +#' @param x An obj of class \code{'INCOMMON'}. #' @param model Model used in the test from which to get classification data. -#' @return A tibble. +#' @return A tibble with classification data. #' @export -get_classes = function(x, model){ - stopifnot(inherits(x, "TAPACLOTH")) - y = x$classifier[[model]]$data %>% - dplyr::select(chr, from, to, ref, alt, karyotype, multiplicity, outcome) - return(y) +classification = function(x) { + stopifnot(inherits(x, "INCOMMON")) + stopifnot("fit" %in% names(x)) + stopifnot("classification" %in% names(x$fit)) + x$fit$classification } -#' Getter for class \code{'TAPACLOTH'}. +#' Getter for class \code{'INCOMMON'}. #' @description #' Get model parameters of the performed classification tests. -#' @param x An obj of class \code{'TAPACLOTH'}. +#' @param x An obj of class \code{'INCOMMON'}. #' @return A tibble containing parameters for all the models used in the classification. #' @export #' -get_params = function(x) { - stopifnot(inherits(x, "TAPACLOTH")) - x$classifier$params +parameters = function(x) { + stopifnot(inherits(x, "INCOMMON")) + stopifnot("fit" %in% names(x)) + stopifnot("params" %in% names(x$fit)) + x$fit$params } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get classification data for a specific gene. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param gene_id The name of the gene. -#' @return A tibble with gene-specific classification data. -#' @export -gene_label = function(x, gene_id){ - stopifnot(inherits(x, "TAPACLOTH")) - x = idify(x) - y = x$classifier$data %>% - dplyr::filter(gene == gene_id) - return(tibble(id = y$id, - label = y$label)) -} -#' Getter for class \code{'TAPACLOTH'}. +#' Getter for class \code{'INCOMMON'}. #' @description -#' Get point and mean classification entropy data for a specific gene. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param gene_id The name of the gene. -#' @return A tibble with gene-specific classification data. +#' Get model parameters of the performed classification tests. +#' @param x An obj of class \code{'INCOMMON'}. +#' @return A tibble containing parameters for all the models used in the classification. #' @export -gene_entropy = function(x, gene_id){ - stopifnot(inherits(x, "TAPACLOTH")) - x = idify(x) - y = x$classifier$data %>% - dplyr::filter(gene == gene_id) - return(tibble(id = y$id, - entropy = y$entropy, - mean_entropy = y$mean_entropy)) +#' +posterior = function(x, id) { + stopifnot(inherits(x, "INCOMMON")) + stopifnot("fit" %in% names(x)) + stopifnot("posterior" %in% names(x$fit)) + stopifnot(id %in% names(x$fit$posterior)) + x$fit$posterior[[id]] } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get classification data for specific selected model. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param model Model used in the test from which to get classification data. -#' @return A tibble with classification data. -#' @export -get_classifier = function(x, model = NULL) { - stopifnot(inherits(x, "TAPACLOTH")) - stopifnot("classifier" %in% names(x)) - x$classifier - } - idify = function(x){ - y = get_data(x) - y = y %>% + x$data = data(x) %>% mutate(id = paste(chr,from,to,ref,alt,sep = ":")) - x$data = y return(x) } unidify = function(x){ - y = get_data(x) - y = y %>% + x$data = data(x) %>% dplyr::select(-id) - x$data = y return(x) } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get coverage for a specific mutation in the sample. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param mutation_id Coordinates of mutation in the form of a string -#' containing `chr`,`from`,`to`,`alt`,`ref` coordinates, colon separated. -#' @return DP of the mutation. -#' @export -get_DP = function(x, id){ +ids = function(x){ + if(!("id" %in% colnames(data(x)))) x = idify(x) + data(x) %>% pull(id) %>% unique() +} + +info = function(x, mutation_id){ + if(!("id" %in% colnames(data(x)))) x = idify(x) + out = data(x) %>% dplyr::filter(id == mutation_id) + if("fit" %in% names(x)) out = classification(x) %>% dplyr::filter(id == mutation_id) + out +} + + +DP = function(x, id){ x = idify(x) - x$data %>% + data(x) %>% dplyr::filter(id == !!id) %>% pull(DP) } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get number of reads with variant for a specific mutation in the sample. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param mutation_id Coordinates of mutation in the form of a string -#' containing `chr`,`from`,`to`,`alt`,`ref` coordinates, colon separated. -#' @return NV of the mutation. -#' @export -get_NV = function(x, id){ +NV = function(x, id){ x = idify(x) - x$data %>% + data(x) %>% dplyr::filter(id == !!id) %>% pull(NV) } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get variant allele frequency (VAF) for a specific mutation in the sample. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param mutation_id Coordinates of mutation in the form of a string -#' containing `chr`,`from`,`to`,`alt`,`ref` coordinates, colon separated. -#' @return VAF of the mutation. -#' @export -get_VAF = function(x, mutation_id){ + +VAF = function(x, mutation_id){ x = idify(x) - x$data %>% + data(x) %>% dplyr::filter(id == mutation_id) %>% pull(VAF) } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get ID of the gene affected by the specified mutation. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param mutation_id Coordinates of mutation in the form of a string -#' containing `chr`,`from`,`to`,`alt`,`ref` coordinates, colon separated. -#' @return A tibble with the name of the gene affected by the specified mutation. -#' @export -get_gene = function(x, mutation_id){ +gene = function(x, mutation_id){ x = idify(x) x$data %>% dplyr::filter(id == mutation_id) %>% @@ -165,47 +120,17 @@ get_gene_role = function(x, id){ pull(gene_role) } -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get coordinates of mutation(s) mapped on a specified gene. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param gene_name Name of the gene affected by the mutation. -#' @return A list of mutation coordinates. -#' @export -get_coord = function(x, gene_name){ - x %>% - idify() %>% - get_data() %>% - dplyr::filter(gene == gene_name) %>% - pull(id) %>% - strsplit(., split = ":") -} - -get_mass = function(x, null_model, id){ - y = null_model$test %>% - dplyr::select(karyotype, multiplicity, l_a, r_a) - - y$id = id - - y$mass = sapply(null_model$test$inputs, function(s) { - s$p[get_NV(x, id)] - }) - - y$gene = get_gene(x, id) - - return(y) -} -#' Plot function for class \code{'TAPACLOTH'}. +#' Plot function for class \code{'INCOMMON'}. #' @description #' Plot results of the classification under the specified model and for the specified gene. -#' @param x An obj of class \code{'TAPACLOTH'}. +#' @param x An obj of class \code{'INCOMMON'}. #' @param gene_name Name of gene affected by the mutation. #' @param model Model used for classification. #' @return A tibble with classification data for the specified mutation. #' @export plot_gene = function(x,model,gene_name){ - stopifnot(inherits(x, "TAPACLOTH")) + stopifnot(inherits(x, "INCOMMON")) model = model %>% tolower() ids = get_id(x,gene_name) lapply(ids, function(i){ @@ -213,130 +138,29 @@ plot_gene = function(x,model,gene_name){ }) } -#' Plot function for class \code{'TAPACLOTH'}. -#' @description -#' Plot results of the BMix fit used for purity estimation, under the specified model. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param model Model used for purity estimate -#' @return A plot of BMix fit. -#' @export -plot_bmix = function(x){ - y = x$purity_estimate$plot_bmix - return(y) -} - - -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Extract data and parameters of purity estimation. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param model Model used for purity estimation -#' @return Purity estimate data. -#' @export -get_purity_estimate = function(x){ - stopifnot(inherits(x, "TAPACLOTH")) - y = x$purity_estimate - return(y) -} - -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Extract the sample purity as estimated using the specified model -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param model Model used for purity estimation -#' @return Sample purity as obtained by BMix purity estimation procedure. -#' @export -get_purity_bmix = function(x){ - stopifnot(inherits(x, "TAPACLOTH")) - y = get_purity_estimate(x) - return(y$purity) -} - -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Extract reliability of input sample purity as compared to TAPACLOTH estimate using the specified model. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param model Model used for purity estimation -#' @return Reliability of input purity estimate. -#' @export -get_reliability = function(x){ - stopifnot(inherits(x, "TAPACLOTH")) - y = get_purity_estimate(x) - return(y$reliability) -} - -get_threshold = function(x, model){ - y = get_classifier(x, model) - y$params$threshold -} - -get_rho = function(x){ - y = get_classifier(x, model = "beta-binomial") - y$params$rho -} - -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' List models used for classification tests. -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @return Model names. -#' @export -models_avail = function(x){ - stopifnot(inherits(x, "TAPACLOTH")) - return(names(x$classifier)) -} - -get_ploidy = function(k){ - stringr::str_split(k, pattern = ":")[[1]] %>% as.integer() %>% sum()} - -#' Getter for class \code{'TAPACLOTH'}. -#' @description -#' Get ID of the mutation(s) affecting the specified gene -#' @param x An obj of class \code{'TAPACLOTH'}. -#' @param gene_name Name of the selected gene. -#' @return ID( of mutation(s). -#' @export -get_id = function(x, gene_name){ - x = idify(x) - x$data %>% - dplyr::filter(gene == gene_name) %>% - pull(id) -} - -closer_dist = function(null_model, nv, karyotypes) { - i = null_model$test %>% - rowwise() %>% - mutate(dist = min(nv - l_a, nv - r_a)) %>% - pull(dist) %>% - abs() %>% - which.min() - return(i) -} - -# Maxima -maximise = function(x) -{ - x %>% - group_by(NV) %>% - filter(density == max(density)) %>% - ungroup() -} - # Prior getter -get_prior = function(x, gene, label){ +get_prior = function(x, gene, tumor_type){ if(is.null(x)) { cli::cli_alert("No prior probabilities provided") return(1) - } else { - out = x %>% filter(gene == !!gene, label == !!label) %>% pull(p) - if(out %>% length() == 0){ - cli::cli_alert("No prior probabilitiy specified for {.field {gene}}") + } + + if(!(gene %in% x$gene)) { + cli::cli_alert("No prior probability specified for {.field {gene}}") return(1) } - else if (out %>% length() > 0) return(out) - } + + if(tumor_type %in% (x %>% dplyr::filter(gene == !!gene) %>% dplyr::pull(tumor_type))) { + out = x %>% dplyr::filter(gene == !!gene, tumor_type == !!tumor_type) + } else { + cli::cli_alert("No {.field {tumor_type}}-specific prior probability specified for {.field {gene}}") + cli::cli_alert("Using a pan-cancer prior") + out = x %>% dplyr::filter(gene == !!gene, tumor_type == 'PANCA') + } + + return(out) } @@ -350,7 +174,7 @@ check_input = function(x){ cli::cli_abort("Unrecogniseable gene names, will not proceed.") if(x$data$gene_role %>% class() != "character" | - setdiff(x$data$gene_role,c("TSG", "oncogene")) %>% length() != 0) + setdiff(x$data$gene_role, c("TSG", "oncogene", NA)) %>% length() != 0) cli::cli_abort("Unrecogniseable gene roles, will not proceed.") if(x$data$chr %>% class() != "character") @@ -410,3 +234,17 @@ add_per_state_probabilities = function(x, NV){ p_assign_all = list(probs)) }) %>% do.call(rbind, .) } + +remove_mutation = function(x, mutation_id){ + x$data = x$data %>% dplyr::filter(id != mutation_id) + if('fit' %in% names(x)) x$fit = x$fit %>% dplyr::filter(id != mutation_id) +} + +# Palettes + +ploidy_colors = CNAqc:::get_karyotypes_colors(c('1:0', '1:1', '2:0', '2:1', '2:2')) +ploidy_colors = ploidy_colors[c("1:0","1:1","2:1","2:2")] +names(ploidy_colors) = sapply(names(ploidy_colors), function(n){ + strsplit(n,split = ":")[[1]] %>% as.integer() %>% sum() +}) +ploidy_colors = c(ploidy_colors, "Tier-2" = 'gray') diff --git a/README.md b/README.md index 86d8d89..2f86e1f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ -[![R-CMD-check](https://github.com/caravagnalab/TAPACLOTH/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/caravagnalab/TAPACLOTH/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/caravagnalab/INCOMMON/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/caravagnalab/INCOMMON/actions/workflows/R-CMD-check.yaml) INCOMMON is a tool for the INference of COpy number and @@ -20,7 +20,7 @@ or with high ploidy and low multiplicity). #### Help and support -## [![](https://img.shields.io/badge/GitHub%20Pages-https://caravagnalab.github.io/TAPACLOTH/-yellow.svg)](https://caravagnalab.github.io/TAPACLOTH) +## [![](https://img.shields.io/badge/GitHub%20Pages-https://caravagnalab.github.io/INCOMMON/-yellow.svg)](https://caravagnalab.github.io/INCOMMON) ## Installation diff --git a/_pkgdown.yml b/_pkgdown.yml index 06262a5..1c1fbf9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: caravagnalab.github.io/TAPACLOTH +url: caravagnalab.github.io/INCOMMON template: bootstrap: 5 diff --git a/data/example_data.rda b/data/example_data.rda index 934ecd6..b189d43 100644 Binary files a/data/example_data.rda and b/data/example_data.rda differ diff --git a/data/pcawg_priors.rda b/data/pcawg_priors.rda new file mode 100644 index 0000000..b0e5a7a Binary files /dev/null and b/data/pcawg_priors.rda differ diff --git a/docs/404.html b/docs/404.html index 80af188..6d642c8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -5,31 +5,31 @@ -Page not found (404) • TAPACLOTH - - - - - - - - - +Page not found (404) • INCOMMON + + + + + + + + + - - + + - Skip to contents + Skip to contents
@@ -90,7 +89,7 @@ diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 6f42aa5..1e0483e 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,5 +1,5 @@ -License • TAPACLOTHLicense • INCOMMON @@ -8,7 +8,7 @@
@@ -63,7 +62,7 @@
diff --git a/docs/LICENSE.html b/docs/LICENSE.html index d60a654..5a6cfb7 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -1,5 +1,5 @@ -GNU General Public License • TAPACLOTHGNU General Public License • INCOMMON @@ -8,7 +8,7 @@