From b08cc066048bd226c5850cc3cdb0f567a1af210a Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Tue, 21 Jan 2025 15:05:13 +0200 Subject: [PATCH 01/12] Polish mediation model metadata --- R/mediate.R | 59 ++++++++++++++++++++++++++++++----- man/getMediation.Rd | 20 +++++++----- tests/testthat/test-mediate.R | 10 +++--- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index daaef2d29..93c103834 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -35,7 +35,7 @@ #' (Default: \code{"holm"}) #' #' @param add.metadata \code{Logical scalar}. Should the model metadata be -#' returned. (Default: \code{FALSE}) +#' returned. (Default: \code{TRUE}) #' #' @param verbose \code{Logical scalar}. Should execution messages be printed. #' (Default: \code{TRUE}) @@ -61,7 +61,7 @@ #' \code{addMediation} returns an updated #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} #' instance with the same \code{data.frame} stored in the metadata as -#' "mediation". Its columns include: +#' "mediation" or as specified in the \code{name} argument. Its columns include: #' #' \describe{ #' \item{Mediator}{the mediator variable} @@ -70,6 +70,10 @@ #' \item{ACME_pval}{the adjusted p-value for the ACME estimate} #' \item{ADE_pval}{the adjusted p-value for the ADE estimate} #' } +#' +#' The original output of \code{\link[mediation:mediate]{mediate}} for each +#' analysed mediator is stored as the "model metadata" attribute of the +#' resulting \code{data.frame} and can be accessed via the \code{attr} function. #' #' @name getMediation #' @@ -97,11 +101,10 @@ #' covariates = c("sex", "age"), #' treat.value = "Scandinavia", #' control.value = "CentralEurope", -#' boot = TRUE, sims = 100, -#' add.metadata = TRUE) +#' boot = TRUE, sims = 100) #' #' # Visualise model statistics for 1st mediator -#' plot(attr(med_df, "metadata")[[1]]) +#' plotMediation(med_df) #' #' # Apply clr transformation to counts assay #' tse <- transformAssay(tse, @@ -145,6 +148,9 @@ #' head(metadata(tse)$reddim_mediation, 5) #' } #' +#' # Access model metadata +#' attr(metadata(tse)$reddim_mediation, "model metadata") +#' NULL #' @rdname getMediation @@ -154,7 +160,7 @@ setMethod("addMediation", signature = c(x = "SummarizedExperiment"), function(x, outcome, treatment, name = "mediation", mediator = NULL, assay.type = NULL, dimred = NULL, family = gaussian(), covariates = NULL, p.adj.method = "holm", - add.metadata = FALSE, verbose = TRUE, ...) { + add.metadata = TRUE, verbose = TRUE, ...) { med_df <- getMediation( x, outcome, treatment, @@ -176,7 +182,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), function(x, outcome, treatment, mediator = NULL, assay.type = NULL, dimred = NULL, family = gaussian(), covariates = NULL, p.adj.method = "holm", - add.metadata = FALSE, verbose = TRUE, ...) { + add.metadata = TRUE, verbose = TRUE, ...) { ###################### Input check ######################## if( !outcome %in% names(colData(x)) ){ stop(outcome, " not found in colData(x).", call. = FALSE) @@ -426,7 +432,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), if( add.metadata ){ # models for every mediator are saved into the metadata attribute - attr(med_df, "metadata") <- results[["Model"]] + med_df <- .add.model.metadata(med_df, results[["Model"]]) } # Order output dataframe by ACME p-values @@ -434,3 +440,40 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), return(med_df) } + +# Organise model metadata into data.frame +.add.model.metadata <- function(med_df, models) { + + # Create empty data.frame to store model metadata + metadata_df <- data.frame(matrix(nrow = nrow(med_df), ncol = 56)) + + # Use mediators and model properties as row and column names, respectively + rownames(metadata_df) <- med_df[["Mediator"]] + colnames(metadata_df) <- names(models[[1]]) + + # Iterate over mediators or rows + for( row in seq_len(length(models)) ){ + # Iterate over model metadata or columns + for( col in colnames(metadata_df) ){ + + entry <- models[[row]][[col]] + + # If entry is null, replace with empty string + if( is.null(entry) ){ + entry <- "" + } + # If entry is not a scalar, convert to list + if( length(entry) > 1){ + entry <- list(entry) + } + + # Store entry into data.frame + metadata_df[[row, col]] <- I(entry) + } + } + + # Add metadata to output as an attribute + attr(med_df, "model metadata") <- metadata_df + + return(med_df) +} \ No newline at end of file diff --git a/man/getMediation.Rd b/man/getMediation.Rd index ad2ff1269..679a357bd 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -22,7 +22,7 @@ getMediation(x, ...) family = gaussian(), covariates = NULL, p.adj.method = "holm", - add.metadata = FALSE, + add.metadata = TRUE, verbose = TRUE, ... ) @@ -37,7 +37,7 @@ getMediation(x, ...) family = gaussian(), covariates = NULL, p.adj.method = "holm", - add.metadata = FALSE, + add.metadata = TRUE, verbose = TRUE, ... ) @@ -80,7 +80,7 @@ of p-values. Passed to \code{p.adjust} function. (Default: \code{"holm"})} \item{add.metadata}{\code{Logical scalar}. Should the model metadata be -returned. (Default: \code{FALSE})} +returned. (Default: \code{TRUE})} \item{verbose}{\code{Logical scalar}. Should execution messages be printed. (Default: \code{TRUE})} @@ -91,7 +91,7 @@ effect sizes for the ACMEs and ADEs of every mediator given as input, whereas \code{addMediation} returns an updated \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} instance with the same \code{data.frame} stored in the metadata as -"mediation". Its columns include: +"mediation" or as specified in the \code{name} argument. Its columns include: \describe{ \item{Mediator}{the mediator variable} @@ -100,6 +100,10 @@ instance with the same \code{data.frame} stored in the metadata as \item{ACME_pval}{the adjusted p-value for the ACME estimate} \item{ADE_pval}{the adjusted p-value for the ADE estimate} } + +The original output of \code{\link[mediation:mediate]{mediate}} for each +analysed mediator is stored as the "model metadata" attribute of the +resulting \code{data.frame} and can be accessed via the \code{attr} function. } \description{ \code{getMediation} and \code{addMediation} provide a wrapper of @@ -139,11 +143,10 @@ med_df <- getMediation(tse, covariates = c("sex", "age"), treat.value = "Scandinavia", control.value = "CentralEurope", - boot = TRUE, sims = 100, - add.metadata = TRUE) + boot = TRUE, sims = 100) # Visualise model statistics for 1st mediator - plot(attr(med_df, "metadata")[[1]]) + plotMediation(med_df) # Apply clr transformation to counts assay tse <- transformAssay(tse, @@ -187,4 +190,7 @@ tse <- addMediation(tse, name = "reddim_mediation", head(metadata(tse)$reddim_mediation, 5) } +# Access model metadata +attr(metadata(tse)$reddim_mediation, "model metadata") + } diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index 4ce89c923..51930f239 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -31,7 +31,7 @@ test_that("getMediation", { expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", assay.type = "wrong_name"), - "wrong_name not found in assays(x).", fixed = TRUE + "'assay.type' must be a valid name of assays(x)", fixed = TRUE ) expect_error( @@ -69,10 +69,10 @@ test_that("getMediation", { treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1) - expect_equal(attr(med_df, "metadata")[[1]]$d.avg, med_out$d.avg) - expect_equal(attr(med_df, "metadata")[[1]]$d.avg.p, med_out$d.avg.p) - expect_equal(attr(med_df, "metadata")[[1]]$z.avg, med_out$z.avg) - expect_equal(attr(med_df, "metadata")[[1]]$z.avg.p, med_out$z.avg.p) + expect_equal(attr(med_df, "model metadata")[[1, "d.avg"]], med_out$d.avg) + expect_equal(attr(med_df, "model metadata")[[1, "d.avg.p"]], med_out$d.avg.p) + expect_equal(attr(med_df, "model metadata")[[1, "z.avg"]], med_out$z.avg) + expect_equal(attr(med_df, "model metadata")[[1, "z.avg.p"]], med_out$z.avg.p) ### Batch 3: check output format and dimensionality with respect to SE ### med_df <- getMediation(tse, outcome = "bmi_group", treatment = "nationality", assay.type = "counts", From 2406685e2fe7a1f0e29123cef861434c5fe32aa3 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Wed, 22 Jan 2025 11:12:18 +0200 Subject: [PATCH 02/12] Fix mediation example --- R/mediate.R | 6 ++++-- man/getMediation.Rd | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index 93c103834..ed1f2b454 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -81,6 +81,7 @@ #' \dontrun{ #' # Import libraries #' library(mia) +#' library(miaViz) #' library(scater) #' #' # Load dataset @@ -146,10 +147,11 @@ #' #' # Show results for first 5 mediators #' head(metadata(tse)$reddim_mediation, 5) -#' } +#' #' #' # Access model metadata #' attr(metadata(tse)$reddim_mediation, "model metadata") +#' } #' NULL @@ -476,4 +478,4 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), attr(med_df, "model metadata") <- metadata_df return(med_df) -} \ No newline at end of file +} diff --git a/man/getMediation.Rd b/man/getMediation.Rd index 679a357bd..388fed96b 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -123,6 +123,7 @@ Importantly, those three arguments are mutually exclusive. \dontrun{ # Import libraries library(mia) +library(miaViz) library(scater) # Load dataset @@ -188,9 +189,10 @@ tse <- addMediation(tse, name = "reddim_mediation", # Show results for first 5 mediators head(metadata(tse)$reddim_mediation, 5) -} + # Access model metadata attr(metadata(tse)$reddim_mediation, "model metadata") +} } From 67e0db8546d06574c53aa6617a7cf12c1036bad0 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Thu, 23 Jan 2025 13:47:48 +0200 Subject: [PATCH 03/12] Streamline getMediation function --- R/mediate.R | 153 +++++++++++++++++----------------- man/getMediation.Rd | 17 +++- tests/testthat/test-mediate.R | 13 +-- 3 files changed, 99 insertions(+), 84 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index ed1f2b454..722bed04f 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -35,7 +35,10 @@ #' (Default: \code{"holm"}) #' #' @param add.metadata \code{Logical scalar}. Should the model metadata be -#' returned. (Default: \code{TRUE}) +#' returned. (Default: \code{TRUE}) +#' +#' @param sort \code{Logical scalar}. Should the results be sorted by decreasing +#' significance in terms of ACME_pval. (Default: \code{FALSE}) #' #' @param verbose \code{Logical scalar}. Should execution messages be printed. #' (Default: \code{TRUE}) @@ -67,12 +70,20 @@ #' \item{Mediator}{the mediator variable} #' \item{ACME_estimate}{the Average Causal Mediation Effect (ACME) estimate} #' \item{ADE_estimate}{the Average Direct Effect (ADE) estimate} +#' \item{Total_estimate}{the Total Effect estimate} #' \item{ACME_pval}{the adjusted p-value for the ACME estimate} #' \item{ADE_pval}{the adjusted p-value for the ADE estimate} +#' \item{Total_pval}{the adjusted p-value for the Total Effect estimate} +#' \item{ACME_CI_lower}{the 2.5% CI for the ACME estimate} +#' \item{ACME_CI_upper}{the 2.5% CI for the ACME estimate} +#' \item{ADE_CI_lower}{the 2.5% CI for the ADE estimate} +#' \item{ADE_CI_upper}{the 97.5% CI for the ADE estimate} +#' \item{Total_CI_lower}{the 2.5 CI for the Total Effect estimate} +#' \item{Total_CI_upper}{the 97.5 CI for the Total Effect estimate} #' } #' #' The original output of \code{\link[mediation:mediate]{mediate}} for each -#' analysed mediator is stored as the "model metadata" attribute of the +#' analysed mediator is stored as the "model_metadata" attribute of the #' resulting \code{data.frame} and can be accessed via the \code{attr} function. #' #' @name getMediation @@ -148,9 +159,8 @@ #' # Show results for first 5 mediators #' head(metadata(tse)$reddim_mediation, 5) #' -#' #' # Access model metadata -#' attr(metadata(tse)$reddim_mediation, "model metadata") +#' attr(metadata(tse)$reddim_mediation, "model_metadata") #' } #' NULL @@ -184,7 +194,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), function(x, outcome, treatment, mediator = NULL, assay.type = NULL, dimred = NULL, family = gaussian(), covariates = NULL, p.adj.method = "holm", - add.metadata = TRUE, verbose = TRUE, ...) { + add.metadata = TRUE, sort = FALSE, verbose = TRUE, ...) { ###################### Input check ######################## if( !outcome %in% names(colData(x)) ){ stop(outcome, " not found in colData(x).", call. = FALSE) @@ -203,7 +213,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), } # Check that arguments can be passed to mediate and filter samples - x <- .check.mediate.args( + x <- .check_mediate_args( x, outcome, treatment, mediator, covariates, verbose, ... ) @@ -250,14 +260,11 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), mediators <- rownames(mat) } - # Create template list of results - results <- list( - Mediator = c(), ACME_estimate = c(), ADE_estimate = c(), - ACME_pval = c(), ADE_pval = c(), Model = list() - ) - + # Create template list of models + models <- list() # Set initial index i <- 0 + for( mediator in mediators ){ # Update index i <- i + 1 @@ -272,18 +279,22 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), family = family, mat = mat, covariates = covariates, ... ) - # Update list of results - results <- .update.results(results, med_out, mediator) + # Update list of models + models <- c(models, list(med_out)) } + + # Name models by mediators + names(models) <- mediators # Combine results into dataframe - med_df <- .make.output(results, p.adj.method, add.metadata) + med_df <- .make_output(models, p.adj.method, add.metadata, sort) + return(med_df) } ) # Check that arguments can be passed to mediate and remove unused samples #' @importFrom stats na.omit -.check.mediate.args <- function( +.check_mediate_args <- function( x, outcome, treatment, mediator, covariates, verbose = TRUE, ...) { # Create dataframe from selected columns of colData @@ -352,6 +363,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), #' @importFrom stats lm formula glm .run_mediate <- function(x, outcome, treatment, mediator = NULL, mat = NULL, family = gaussian(), covariates = NULL, ...) { + # Create initial dataframe with outcome and treatment variables df <- data.frame( Outcome = colData(x)[[outcome]], Treatment = colData(x)[[treatment]]) @@ -397,69 +409,22 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), treat = "Treatment", mediator = "Mediator", covariates = covariates, ... ) - return(med_out) -} - -# Update list of results -.update.results <- function(results, med_out, mediator) { - # Update model variables - results[["Mediator"]] <- c(results[["Mediator"]], mediator) - # Update stats of ACME (average causal mediation effect) - results[["ACME_estimate"]] <- c(results[["ACME_estimate"]], med_out$d.avg) - results[["ACME_pval"]] <- c(results[["ACME_pval"]], med_out$d.avg.p) - # Update stats of ADE (average direct effect) - results[["ADE_estimate"]] <- c(results[["ADE_estimate"]], med_out$z.avg) - results[["ADE_pval"]] <- c(results[["ADE_pval"]], med_out$z.avg.p) - # Add current model to metadata - results[["Model"]][[length(results[["Model"]]) + 1]] <- med_out - return(results) + return(med_out) } # Combine results into output dataframe -.make.output <- function(results, p.adj.method, add.metadata) { - - # Create dataframe with model variables, effect sizes and p-values - med_df <- do.call(data.frame, results[seq_len(length(results) - 1)]) - - # Compute adjusted p-values and add them to dataframe - med_df[["ACME_pval"]] <- p.adjust( - med_df[["ACME_pval"]], - method = p.adj.method - ) - med_df[["ADE_pval"]] <- p.adjust( - med_df[["ADE_pval"]], - method = p.adj.method - ) - - if( add.metadata ){ - # models for every mediator are saved into the metadata attribute - med_df <- .add.model.metadata(med_df, results[["Model"]]) - } - - # Order output dataframe by ACME p-values - med_df <- med_df[order(med_df[["ACME_pval"]]), ] - - return(med_df) -} - -# Organise model metadata into data.frame -.add.model.metadata <- function(med_df, models) { - +.make_output <- function(models, p.adj.method, add.metadata, sort) { # Create empty data.frame to store model metadata - metadata_df <- data.frame(matrix(nrow = nrow(med_df), ncol = 56)) - + med_df <- data.frame(matrix(nrow = length(models), ncol = 56)) # Use mediators and model properties as row and column names, respectively - rownames(metadata_df) <- med_df[["Mediator"]] - colnames(metadata_df) <- names(models[[1]]) - + rownames(med_df) <- names(models) + colnames(med_df) <- names(models[[1]]) # Iterate over mediators or rows for( row in seq_len(length(models)) ){ # Iterate over model metadata or columns - for( col in colnames(metadata_df) ){ - + for( col in colnames(med_df) ){ entry <- models[[row]][[col]] - # If entry is null, replace with empty string if( is.null(entry) ){ entry <- "" @@ -468,14 +433,50 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), if( length(entry) > 1){ entry <- list(entry) } - # Store entry into data.frame - metadata_df[[row, col]] <- I(entry) + med_df[[row, col]] <- I(entry) } } - - # Add metadata to output as an attribute - attr(med_df, "model metadata") <- metadata_df - + + # Convert rownames to column + med_df["Mediator"] <- rownames(med_df) + rownames(med_df) <- NULL + # Select columns to keep + med_df <- med_df[ , c("Mediator", "d.avg", "z.avg", "tau.coef", "d.avg.p", + "z.avg.p", "tau.p", "d.avg.ci", "z.avg.ci", "tau.ci")] + # Rename columns + colnames(med_df) <- c("Mediator", "ACME_estimate", "ADE_estimate", + "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_CI", + "ADE_CI", "Total_CI") + + # Compute adjusted p-values and add them to dataframe + pval_cols <- endsWith(colnames(med_df), "pval") + med_df[ , pval_cols] <- apply( + med_df[ , pval_cols], MARGIN = 2, + p.adjust, method = p.adj.method + ) + + # Find CI columns + ci_cols <- colnames(med_df)[endsWith(colnames(med_df), "CI")] + for( col in ci_cols ){ + # Retrieve CI columns + ci_list <- unlist(med_df[ , col]) + upper_cond <- seq_len(length(ci_list)) %% 2 == 0 + names(ci_list) <- NULL + # Split lower and upper CIs + med_df[ , paste(col, "lower", sep = "_")] <- ci_list[!upper_cond] + med_df[ , paste(col, "upper", sep = "_")] <- ci_list[upper_cond] + med_df[ , col] <- NULL + } + + if( add.metadata ){ + # Store model for each mediator into the model_metadata attribute + attr(med_df, "model_metadata") <- models + } + if( sort ){ + # Order output dataframe by ACME p-values + med_df <- med_df[order(med_df[["ACME_pval"]]), ] + } + return(med_df) -} +} \ No newline at end of file diff --git a/man/getMediation.Rd b/man/getMediation.Rd index 388fed96b..5459516c8 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -38,6 +38,7 @@ getMediation(x, ...) covariates = NULL, p.adj.method = "holm", add.metadata = TRUE, + sort = FALSE, verbose = TRUE, ... ) @@ -84,6 +85,9 @@ returned. (Default: \code{TRUE})} \item{verbose}{\code{Logical scalar}. Should execution messages be printed. (Default: \code{TRUE})} + +\item{sort}{\code{Logical scalar}. Should the results be sorted by decreasing +significance in terms of ACME_pval. (Default: \code{FALSE})} } \value{ \code{getMediation} returns a \code{data.frame} of adjusted p-values and @@ -97,12 +101,20 @@ instance with the same \code{data.frame} stored in the metadata as \item{Mediator}{the mediator variable} \item{ACME_estimate}{the Average Causal Mediation Effect (ACME) estimate} \item{ADE_estimate}{the Average Direct Effect (ADE) estimate} +\item{Total_estimate}{the Total Effect estimate} \item{ACME_pval}{the adjusted p-value for the ACME estimate} \item{ADE_pval}{the adjusted p-value for the ADE estimate} +\item{Total_pval}{the adjusted p-value for the Total Effect estimate} +\item{ACME_CI_lower}{the 2.5\% CI for the ACME estimate} +\item{ACME_CI_upper}{the 2.5\% CI for the ACME estimate} +\item{ADE_CI_lower}{the 2.5\% CI for the ADE estimate} +\item{ADE_CI_upper}{the 97.5\% CI for the ADE estimate} +\item{Total_CI_lower}{the 2.5 CI for the Total Effect estimate} +\item{Total_CI_upper}{the 97.5 CI for the Total Effect estimate} } The original output of \code{\link[mediation:mediate]{mediate}} for each -analysed mediator is stored as the "model metadata" attribute of the +analysed mediator is stored as the "model_metadata" attribute of the resulting \code{data.frame} and can be accessed via the \code{attr} function. } \description{ @@ -190,9 +202,8 @@ tse <- addMediation(tse, name = "reddim_mediation", # Show results for first 5 mediators head(metadata(tse)$reddim_mediation, 5) - # Access model metadata -attr(metadata(tse)$reddim_mediation, "model metadata") +attr(metadata(tse)$reddim_mediation, "model_metadata") } } diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index 51930f239..24e03a1dc 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -69,10 +69,10 @@ test_that("getMediation", { treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1) - expect_equal(attr(med_df, "model metadata")[[1, "d.avg"]], med_out$d.avg) - expect_equal(attr(med_df, "model metadata")[[1, "d.avg.p"]], med_out$d.avg.p) - expect_equal(attr(med_df, "model metadata")[[1, "z.avg"]], med_out$z.avg) - expect_equal(attr(med_df, "model metadata")[[1, "z.avg.p"]], med_out$z.avg.p) + expect_equal(attr(med_df, "model_metadata")[[1]][["d.avg"]], med_out$d.avg) + expect_equal(attr(med_df, "model_metadata")[[1]][["d.avg.p"]], med_out$d.avg.p) + expect_equal(attr(med_df, "model_metadata")[[1]][["z.avg"]], med_out$z.avg) + expect_equal(attr(med_df, "model_metadata")[[1]][["z.avg.p"]], med_out$z.avg.p) ### Batch 3: check output format and dimensionality with respect to SE ### med_df <- getMediation(tse, outcome = "bmi_group", treatment = "nationality", assay.type = "counts", @@ -81,6 +81,9 @@ test_that("getMediation", { expect_named(tse, med_df[["Mediator"]]) - expect_named(med_df, c("Mediator", "ACME_estimate", "ADE_estimate", "ACME_pval", "ADE_pval")) + expect_named(med_df, c("Mediator", "ACME_estimate", "ADE_estimate", + "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_CI_lower", + "ACME_CI_upper", "ADE_CI_lower", "ADE_CI_upper", "Total_CI_lower", + "Total_CI_upper")) }) From 16befb42805e02ebadbb136e69beca7275990682 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Thu, 23 Jan 2025 18:03:35 +0200 Subject: [PATCH 04/12] Update getMediation output names --- R/mediate.R | 26 +++++++++++++------------- man/getMediation.Rd | 12 ++++++------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index 722bed04f..705bc6e06 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -74,12 +74,12 @@ #' \item{ACME_pval}{the adjusted p-value for the ACME estimate} #' \item{ADE_pval}{the adjusted p-value for the ADE estimate} #' \item{Total_pval}{the adjusted p-value for the Total Effect estimate} -#' \item{ACME_CI_lower}{the 2.5% CI for the ACME estimate} -#' \item{ACME_CI_upper}{the 2.5% CI for the ACME estimate} -#' \item{ADE_CI_lower}{the 2.5% CI for the ADE estimate} -#' \item{ADE_CI_upper}{the 97.5% CI for the ADE estimate} -#' \item{Total_CI_lower}{the 2.5 CI for the Total Effect estimate} -#' \item{Total_CI_upper}{the 97.5 CI for the Total Effect estimate} +#' \item{ACME_lower}{the 2.5% CI for the ACME estimate} +#' \item{ACME_upper}{the 2.5% CI for the ACME estimate} +#' \item{ADE_lower}{the 2.5% CI for the ADE estimate} +#' \item{ADE_upper}{the 97.5% CI for the ADE estimate} +#' \item{Total_lower}{the 2.5 CI for the Total Effect estimate} +#' \item{Total_upper}{the 97.5 CI for the Total Effect estimate} #' } #' #' The original output of \code{\link[mediation:mediate]{mediate}} for each @@ -456,17 +456,17 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), p.adjust, method = p.adj.method ) - # Find CI columns - ci_cols <- colnames(med_df)[endsWith(colnames(med_df), "CI")] - for( col in ci_cols ){ + estimates <- c("ACME", "ADE", "Total") + for( est in estimates ){ # Retrieve CI columns - ci_list <- unlist(med_df[ , col]) + ci_col <- paste(est, "CI", sep = "_") + ci_list <- unlist(med_df[ , ci_col]) upper_cond <- seq_len(length(ci_list)) %% 2 == 0 names(ci_list) <- NULL # Split lower and upper CIs - med_df[ , paste(col, "lower", sep = "_")] <- ci_list[!upper_cond] - med_df[ , paste(col, "upper", sep = "_")] <- ci_list[upper_cond] - med_df[ , col] <- NULL + med_df[ , paste(est, "lower", sep = "_")] <- ci_list[!upper_cond] + med_df[ , paste(est, "upper", sep = "_")] <- ci_list[upper_cond] + med_df[ , ci_col] <- NULL } if( add.metadata ){ diff --git a/man/getMediation.Rd b/man/getMediation.Rd index 5459516c8..41394945c 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -105,12 +105,12 @@ instance with the same \code{data.frame} stored in the metadata as \item{ACME_pval}{the adjusted p-value for the ACME estimate} \item{ADE_pval}{the adjusted p-value for the ADE estimate} \item{Total_pval}{the adjusted p-value for the Total Effect estimate} -\item{ACME_CI_lower}{the 2.5\% CI for the ACME estimate} -\item{ACME_CI_upper}{the 2.5\% CI for the ACME estimate} -\item{ADE_CI_lower}{the 2.5\% CI for the ADE estimate} -\item{ADE_CI_upper}{the 97.5\% CI for the ADE estimate} -\item{Total_CI_lower}{the 2.5 CI for the Total Effect estimate} -\item{Total_CI_upper}{the 97.5 CI for the Total Effect estimate} +\item{ACME_lower}{the 2.5\% CI for the ACME estimate} +\item{ACME_upper}{the 2.5\% CI for the ACME estimate} +\item{ADE_lower}{the 2.5\% CI for the ADE estimate} +\item{ADE_upper}{the 97.5\% CI for the ADE estimate} +\item{Total_lower}{the 2.5 CI for the Total Effect estimate} +\item{Total_upper}{the 97.5 CI for the Total Effect estimate} } The original output of \code{\link[mediation:mediate]{mediate}} for each From 8d27d8b770b9f20f7e3a2d337ea174fd580d0a53 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Thu, 23 Jan 2025 18:19:26 +0200 Subject: [PATCH 05/12] Fix test-mediate.R --- tests/testthat/test-mediate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index 24e03a1dc..cf0c3d600 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -82,8 +82,8 @@ test_that("getMediation", { expect_named(tse, med_df[["Mediator"]]) expect_named(med_df, c("Mediator", "ACME_estimate", "ADE_estimate", - "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_CI_lower", - "ACME_CI_upper", "ADE_CI_lower", "ADE_CI_upper", "Total_CI_lower", - "Total_CI_upper")) + "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_lower", + "ACME_upper", "ADE_lower", "ADE_upper", "Total_lower", + "Total_upper")) }) From 370966dc38650872a077b3d341930aa12b4800d4 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 24 Jan 2025 15:29:43 +0200 Subject: [PATCH 06/12] up --- R/mediate.R | 202 +++++++++++++++++++++++----------------------------- 1 file changed, 90 insertions(+), 112 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index 705bc6e06..474f3943c 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -3,13 +3,13 @@ #' \code{getMediation} and \code{addMediation} provide a wrapper of #' \code{\link[mediation:mediate]{mediate}} for #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}. -#' +#' #' @param x a #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}. -#' +#' #' @param outcome \code{Character scalar}. Indicates the colData variable used #' as outcome in the model. -#' +#' #' @param treatment \code{Character scalar}. Indicates the colData variable #' used as treatment in the model. #' @@ -18,37 +18,37 @@ #' #' @param assay.type \code{Character scalar}. Specifies the assay used for #' feature-wise mediation analysis. (Default: \code{NULL}) -#' +#' #' @param dimred \code{Character scalar}. Indicates the reduced dimension #' result in \code{reducedDims(object)} for component-wise mediation analysis. #' (Default: \code{NULL}) #' #' @param family \code{Character scalar}. A specification for the outcome model #' link function. (Default: \code{gaussian("identity")}) -#' +#' #' @param covariates \code{Character scalar} or \code{character vector}. #' Indicates the colData variables used as covariates in the model. #' (Default: \code{NULL}) -#' +#' #' @param p.adj.method \code{Character scalar}. Selects adjustment method #' of p-values. Passed to `p.adjust` function. #' (Default: \code{"holm"}) -#' +#' #' @param add.metadata \code{Logical scalar}. Should the model metadata be #' returned. (Default: \code{TRUE}) -#' +#' #' @param sort \code{Logical scalar}. Should the results be sorted by decreasing #' significance in terms of ACME_pval. (Default: \code{FALSE}) -#' +#' #' @param verbose \code{Logical scalar}. Should execution messages be printed. #' (Default: \code{TRUE}) -#' -#' @param name \code{Character scalar}. A name for the column of the +#' +#' @param name \code{Character scalar}. A name for the column of the #' \code{colData} where results will be stored. (Default: \code{"mediation"}) -#' +#' #' @param ... additional parameters that can be passed to #' \code{\link[mediation:mediate]{mediate}}. -#' +#' #' @details #' This wrapper of \code{\link[mediation:mediate]{mediate}} for #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} @@ -57,15 +57,15 @@ #' another variable in colData (argument \code{mediator}) or an assay #' (argument \code{assay.type}) or a reducedDim (argument \code{dimred}). #' Importantly, those three arguments are mutually exclusive. -#' -#' @return +#' +#' @return #' \code{getMediation} returns a \code{data.frame} of adjusted p-values and #' effect sizes for the ACMEs and ADEs of every mediator given as input, whereas #' \code{addMediation} returns an updated #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} #' instance with the same \code{data.frame} stored in the metadata as #' "mediation" or as specified in the \code{name} argument. Its columns include: -#' +#' #' \describe{ #' \item{Mediator}{the mediator variable} #' \item{ACME_estimate}{the Average Causal Mediation Effect (ACME) estimate} @@ -81,7 +81,7 @@ #' \item{Total_lower}{the 2.5 CI for the Total Effect estimate} #' \item{Total_upper}{the 97.5 CI for the Total Effect estimate} #' } -#' +#' #' The original output of \code{\link[mediation:mediate]{mediate}} for each #' analysed mediator is stored as the "model_metadata" attribute of the #' resulting \code{data.frame} and can be accessed via the \code{attr} function. @@ -94,16 +94,16 @@ #' library(mia) #' library(miaViz) #' library(scater) -#' +#' #' # Load dataset #' data(hitchip1006, package = "miaTime") #' tse <- hitchip1006 -#' +#' #' # Agglomerate features by family (merely to speed up execution) #' tse <- agglomerateByRank(tse, rank = "Phylum") #' # Convert BMI variable to numeric #' tse$bmi_group <- as.numeric(tse$bmi_group) -#' +#' #' # Analyse mediated effect of nationality on BMI via alpha diversity #' # 100 permutations were done to speed up execution, but ~1000 are recommended #' med_df <- getMediation(tse, @@ -114,10 +114,10 @@ #' treat.value = "Scandinavia", #' control.value = "CentralEurope", #' boot = TRUE, sims = 100) -#' +#' #' # Visualise model statistics for 1st mediator #' plotMediation(med_df) -#' +#' #' # Apply clr transformation to counts assay #' tse <- transformAssay(tse, #' method = "clr", @@ -134,16 +134,16 @@ #' control.value = "CentralEurope", #' boot = TRUE, sims = 100, #' p.adj.method = "fdr") -#' +#' #' # Show results for first 5 mediators #' head(metadata(tse)$assay_mediation, 5) -#' +#' #' # Perform ordination #' tse <- runMDS(tse, name = "MDS", #' method = "euclidean", #' assay.type = "clr", #' ncomponents = 3) -#' +#' #' # Analyse mediated effect of nationality on BMI via NMDS components #' # 100 permutations were done to speed up execution, but ~1000 are recommended #' tse <- addMediation(tse, name = "reddim_mediation", @@ -155,14 +155,14 @@ #' control.value = "CentralEurope", #' boot = TRUE, sims = 100, #' p.adj.method = "fdr") -#' +#' #' # Show results for first 5 mediators #' head(metadata(tse)$reddim_mediation, 5) -#' +#' #' # Access model metadata #' attr(metadata(tse)$reddim_mediation, "model_metadata") #' } -#' +#' NULL #' @rdname getMediation @@ -173,14 +173,14 @@ setMethod("addMediation", signature = c(x = "SummarizedExperiment"), mediator = NULL, assay.type = NULL, dimred = NULL, family = gaussian(), covariates = NULL, p.adj.method = "holm", add.metadata = TRUE, verbose = TRUE, ...) { - + med_df <- getMediation( x, outcome, treatment, mediator, assay.type, dimred, family, covariates, p.adj.method, add.metadata, verbose, ... ) - + x <- .add_values_to_metadata(x, name, med_df) return(x) } @@ -211,18 +211,18 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), if( !.is_a_bool(verbose) ){ stop("verbose must be TRUE or FALSE.", call. = FALSE) } - + # Check that arguments can be passed to mediate and filter samples x <- .check_mediate_args( x, outcome, treatment, mediator, covariates, verbose, ... ) - + # Check which mediator was provided (colData, assay or reducedDim) med_opts <- unlist(lapply( list(mediator = mediator, assay.type = assay.type, dimred = dimred), function(x) !is.null(x) )) - + if( sum(med_opts) != 1 ){ # Throw error if none or multiple mediation options are specified stop( @@ -231,13 +231,13 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), call. = FALSE ) } - + if ( med_opts[[1]] ){ # Check that mediator is in colData if( !mediator %in% names(colData(x)) ) { stop(mediator, " not found in colData(x).", call. = FALSE) } - # Use mediator for analysis + # Use mediator for analysis mediators <- mediator mat <- NULL } else if( med_opts[[2]] ){ @@ -259,14 +259,14 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), # Use reducedDim for analysis mediators <- rownames(mat) } - + # Create template list of models models <- list() - # Set initial index + # Set initial index i <- 0 - + for( mediator in mediators ){ - # Update index + # Update index i <- i + 1 if( verbose ){ message("\rMediator ", i, " out of ", @@ -282,12 +282,12 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), # Update list of models models <- c(models, list(med_out)) } - + # Name models by mediators names(models) <- mediators # Combine results into dataframe med_df <- .make_output(models, p.adj.method, add.metadata, sort) - + return(med_df) } ) @@ -296,30 +296,30 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), #' @importFrom stats na.omit .check_mediate_args <- function( x, outcome, treatment, mediator, covariates, verbose = TRUE, ...) { - + # Create dataframe from selected columns of colData df <- as.data.frame(colData(x)[ , names(colData(x)) %in% c( outcome, treatment, mediator, covariates)]) # Store kwargs into variable kwargs <- list(...) - + # Remove missing data from df df <- na.omit(df) diff <- ncol(x) - nrow(df) - + if( diff != 0 ){ # Remove missing data from se x <- x[ , rownames(df)] - + if( verbose ){ message(diff, " samples removed because of missing data.") } } - + # If boot is TRUE and treatment variable is discrete and has 3+ levels if( !is.null(kwargs[["boot"]]) && !is.numeric(df[[treatment]]) && length(unique((df[[treatment]]))) > 2 ) { - + ## if control and treat value are not specified if( any(vapply(kwargs[c("control.value", "treat.value")], is.null, logical(1))) ){ @@ -337,17 +337,17 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), "the treatment variable.", call. = FALSE ) } - + # Find indices of samples that belong to either control or treatment keep <- df[[treatment]] %in% kwargs[c("control.value", "treat.value")] - + # Remove samples different from control and treatment from df df <- df[keep, ] diff <- ncol(x) - nrow(df) - + # Remove samples different from control and treatment from se x <- x[ , rownames(df)] - + if( verbose ){ message( diff, " samples removed because different ", @@ -367,7 +367,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), # Create initial dataframe with outcome and treatment variables df <- data.frame( Outcome = colData(x)[[outcome]], Treatment = colData(x)[[treatment]]) - + if( is.null(mat) ){ # If matrix not given, fetch mediator from colData df[["Mediator"]] <- colData(x)[[mediator]] @@ -375,12 +375,12 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), # If matrix given, use it as mediators df[["Mediator"]] <- mat[mediator, ] } - + # Define basic formula mediation model relation_m <- "Mediator ~ Treatment" # Define basic formula outcome model relation_dv <- "Outcome ~ Treatment + Mediator" - + if( !is.null(covariates) ){ # Fetch covariates from colData and store them in dataframe df <- cbind(df, colData(x)[covariates]) @@ -414,69 +414,47 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), } # Combine results into output dataframe +#' @importFrom dplyr select +#' @importFrom tidyr function starts_with ends_with unnest_wider +#' @importFrom stringr str_extract str_replace_all .make_output <- function(models, p.adj.method, add.metadata, sort) { - # Create empty data.frame to store model metadata - med_df <- data.frame(matrix(nrow = length(models), ncol = 56)) - # Use mediators and model properties as row and column names, respectively - rownames(med_df) <- names(models) - colnames(med_df) <- names(models[[1]]) - # Iterate over mediators or rows - for( row in seq_len(length(models)) ){ - # Iterate over model metadata or columns - for( col in colnames(med_df) ){ - entry <- models[[row]][[col]] - # If entry is null, replace with empty string - if( is.null(entry) ){ - entry <- "" - } - # If entry is not a scalar, convert to list - if( length(entry) > 1){ - entry <- list(entry) - } - # Store entry into data.frame - med_df[[row, col]] <- I(entry) - } - } - - # Convert rownames to column - med_df["Mediator"] <- rownames(med_df) - rownames(med_df) <- NULL - # Select columns to keep - med_df <- med_df[ , c("Mediator", "d.avg", "z.avg", "tau.coef", "d.avg.p", - "z.avg.p", "tau.p", "d.avg.ci", "z.avg.ci", "tau.ci")] - # Rename columns - colnames(med_df) <- c("Mediator", "ACME_estimate", "ADE_estimate", - "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_CI", - "ADE_CI", "Total_CI") + # Combine results + res <- do.call(rbind, models) |> as.data.frame() + # Select certain data types + res <- res |> + select(tidyr::starts_with(c("d.avg", "z.avg", "tau"))) |> + select(-tidyr::ends_with(c("sims"))) + # Get columns with scalar values and turn them into vector instead of list + cols <- vapply(res, function(col) all(lengths(col) == 1L), logical(1L)) + res[, cols] <- lapply(res[, cols], unlist) + # Unnest confidence interval columns + res <- res |> unnest_wider(tidyr::ends_with(".ci"), names_sep = "_") + # Replace confidence interval values with "lower" and "upper" + limits <- unique(str_extract(colnames(res), "([0-9.]+)%")) + limits <- limits[!is.na(limits)] + limits <- limits[order(as.numeric(gsub("%", "", limits)))] + lookup <- c("lower", "upper") + names(lookup) <- limits + colnames(res) <- str_replace_all(colnames(res), lookup) + # Tidy other names also + lookup <- c("d.avg" = "ACME", "z.avg" = "ADE", "tau" = "Total", + "\\.p" = "_pval", "\\.ci_" = "_") + colnames(res) <- str_replace_all(colnames(res), lookup) # Compute adjusted p-values and add them to dataframe - pval_cols <- endsWith(colnames(med_df), "pval") - med_df[ , pval_cols] <- apply( - med_df[ , pval_cols], MARGIN = 2, - p.adjust, method = p.adj.method - ) - - estimates <- c("ACME", "ADE", "Total") - for( est in estimates ){ - # Retrieve CI columns - ci_col <- paste(est, "CI", sep = "_") - ci_list <- unlist(med_df[ , ci_col]) - upper_cond <- seq_len(length(ci_list)) %% 2 == 0 - names(ci_list) <- NULL - # Split lower and upper CIs - med_df[ , paste(est, "lower", sep = "_")] <- ci_list[!upper_cond] - med_df[ , paste(est, "upper", sep = "_")] <- ci_list[upper_cond] - med_df[ , ci_col] <- NULL - } - + cols <- endsWith(colnames(res), "pval") + pcols <- lapply(res[ , cols], p.adjust, method = p.adj.method) + pcols <- do.call(cbind, pcols) + colnames(pcols) <- gsub("pval", "padj", colnames(pcols)) + res <- cbind(res, pcols) + + # Store model for each mediator into the model_metadata attribute if( add.metadata ){ - # Store model for each mediator into the model_metadata attribute - attr(med_df, "model_metadata") <- models + attr(res, "model_metadata") <- models } + # Order output dataframe by ACME p-values if( sort ){ - # Order output dataframe by ACME p-values - med_df <- med_df[order(med_df[["ACME_pval"]]), ] + res <- res[order(res[["ACME_padj"]]), ] } - - return(med_df) + return(res) } \ No newline at end of file From 69015f3eb2f76f9950a512185a986ffc1ebf0c8a Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 24 Jan 2025 15:33:40 +0200 Subject: [PATCH 07/12] up --- NAMESPACE | 6 ++++++ R/mediate.R | 4 ++-- man/getMediation.Rd | 6 +++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 89f8f960e..6ea14b7fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -400,6 +400,7 @@ importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,n) importFrom(dplyr,rename) +importFrom(dplyr,select) importFrom(dplyr,sym) importFrom(dplyr,tally) importFrom(mediation,mediate) @@ -428,10 +429,15 @@ importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,terms) +importFrom(stringr,str_extract) +importFrom(stringr,str_replace_all) importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) +importFrom(tidyr,ends_with) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) +importFrom(tidyr,starts_with) +importFrom(tidyr,unnest_wider) importFrom(utils,assignInMyNamespace) importFrom(utils,combn) importFrom(utils,getFromNamespace) diff --git a/R/mediate.R b/R/mediate.R index 474f3943c..65fbffabb 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -422,8 +422,8 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), res <- do.call(rbind, models) |> as.data.frame() # Select certain data types res <- res |> - select(tidyr::starts_with(c("d.avg", "z.avg", "tau"))) |> - select(-tidyr::ends_with(c("sims"))) + select(starts_with(c("d.avg", "z.avg", "tau"))) |> + select(-ends_with(c("sims"))) # Get columns with scalar values and turn them into vector instead of list cols <- vapply(res, function(col) all(lengths(col) == 1L), logical(1L)) res[, cols] <- lapply(res[, cols], unlist) diff --git a/man/getMediation.Rd b/man/getMediation.Rd index 41394945c..d1880cee8 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -141,7 +141,7 @@ library(scater) # Load dataset data(hitchip1006, package = "miaTime") tse <- hitchip1006 - + # Agglomerate features by family (merely to speed up execution) tse <- agglomerateByRank(tse, rank = "Phylum") # Convert BMI variable to numeric @@ -157,7 +157,7 @@ med_df <- getMediation(tse, treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 100) - + # Visualise model statistics for 1st mediator plotMediation(med_df) @@ -180,7 +180,7 @@ tse <- addMediation(tse, name = "assay_mediation", # Show results for first 5 mediators head(metadata(tse)$assay_mediation, 5) - + # Perform ordination tse <- runMDS(tse, name = "MDS", method = "euclidean", From 1ce44d29f181cf3006591ee83a7d7f6bce50fb01 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Sun, 26 Jan 2025 19:06:26 +0200 Subject: [PATCH 08/12] Finalise getMediation --- DESCRIPTION | 4 ++-- NAMESPACE | 2 +- R/mediate.R | 32 ++++++++++++++++++-------------- man/getMediation.Rd | 27 +++++++++++++++------------ 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32b27e33e..cdeb9fdf1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.18 +Version: 1.15.19 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", @@ -83,6 +83,7 @@ Imports: scater, scuttle, stats, + stringr, tibble, tidyr, utils, @@ -102,7 +103,6 @@ Suggests: reldist, rhdf5, rmarkdown, - stringr, testthat, topicdoc, topicmodels, diff --git a/NAMESPACE b/NAMESPACE index 5431129d2..b89a09558 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -438,8 +438,8 @@ importFrom(tidyr,ends_with) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) importFrom(tidyr,starts_with) -importFrom(tidyr,unnest_wider) importFrom(tidyr,unnest) +importFrom(tidyr,unnest_wider) importFrom(utils,assignInMyNamespace) importFrom(utils,combn) importFrom(utils,getFromNamespace) diff --git a/R/mediate.R b/R/mediate.R index 65fbffabb..987ace221 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -68,18 +68,21 @@ #' #' \describe{ #' \item{Mediator}{the mediator variable} -#' \item{ACME_estimate}{the Average Causal Mediation Effect (ACME) estimate} -#' \item{ADE_estimate}{the Average Direct Effect (ADE) estimate} -#' \item{Total_estimate}{the Total Effect estimate} -#' \item{ACME_pval}{the adjusted p-value for the ACME estimate} -#' \item{ADE_pval}{the adjusted p-value for the ADE estimate} -#' \item{Total_pval}{the adjusted p-value for the Total Effect estimate} -#' \item{ACME_lower}{the 2.5% CI for the ACME estimate} -#' \item{ACME_upper}{the 2.5% CI for the ACME estimate} -#' \item{ADE_lower}{the 2.5% CI for the ADE estimate} -#' \item{ADE_upper}{the 97.5% CI for the ADE estimate} -#' \item{Total_lower}{the 2.5 CI for the Total Effect estimate} -#' \item{Total_upper}{the 97.5 CI for the Total Effect estimate} +#' \item{ACME}{the Average Causal Mediation Effect (ACME) estimate} +#' \item{ACME_pval}{the original p-value for the ACME estimate} +#' \item{ACME_lower}{the lower bound of the CI for the ACME estimate} +#' \item{ACME_upper}{the upper bound of the CI for the ACME estimate} +#' \item{ADE}{the Average Direct Effect (ADE) estimate} +#' \item{ADE_pval}{the original p-value for the ADE estimate} +#' \item{ADE_lower}{the lower bound of the CI for the ADE estimate} +#' \item{ADE_upper}{the upper bound of the CI for the ADE estimate} +#' \item{Total.coef}{the Total Effect estimate} +#' \item{Total_pval}{the original p-value for the Total Effect estimate} +#' \item{Total_lower}{the lower bound of the CI for the Total Effect estimate} +#' \item{Total_upper}{the upper bound of the CI for the Total Effect estimate} +#' \item{ACME_padj}{the adjusted p-value for the ACME estimate} +#' \item{ADE_padj}{the adjusted p-value for the ADE estimate} +#' \item{Total_padj}{the adjusted p-value for the Total Effect estimate} #' } #' #' The original output of \code{\link[mediation:mediate]{mediate}} for each @@ -415,14 +418,15 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), # Combine results into output dataframe #' @importFrom dplyr select -#' @importFrom tidyr function starts_with ends_with unnest_wider +#' @importFrom tidyr starts_with ends_with unnest_wider #' @importFrom stringr str_extract str_replace_all .make_output <- function(models, p.adj.method, add.metadata, sort) { # Combine results res <- do.call(rbind, models) |> as.data.frame() + res[["Mediator"]] <- names(models) # Select certain data types res <- res |> - select(starts_with(c("d.avg", "z.avg", "tau"))) |> + select(Mediator, starts_with(c("d.avg", "z.avg", "tau"))) |> select(-ends_with(c("sims"))) # Get columns with scalar values and turn them into vector instead of list cols <- vapply(res, function(col) all(lengths(col) == 1L), logical(1L)) diff --git a/man/getMediation.Rd b/man/getMediation.Rd index d1880cee8..df8693ceb 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -99,18 +99,21 @@ instance with the same \code{data.frame} stored in the metadata as \describe{ \item{Mediator}{the mediator variable} -\item{ACME_estimate}{the Average Causal Mediation Effect (ACME) estimate} -\item{ADE_estimate}{the Average Direct Effect (ADE) estimate} -\item{Total_estimate}{the Total Effect estimate} -\item{ACME_pval}{the adjusted p-value for the ACME estimate} -\item{ADE_pval}{the adjusted p-value for the ADE estimate} -\item{Total_pval}{the adjusted p-value for the Total Effect estimate} -\item{ACME_lower}{the 2.5\% CI for the ACME estimate} -\item{ACME_upper}{the 2.5\% CI for the ACME estimate} -\item{ADE_lower}{the 2.5\% CI for the ADE estimate} -\item{ADE_upper}{the 97.5\% CI for the ADE estimate} -\item{Total_lower}{the 2.5 CI for the Total Effect estimate} -\item{Total_upper}{the 97.5 CI for the Total Effect estimate} +\item{ACME}{the Average Causal Mediation Effect (ACME) estimate} +\item{ACME_pval}{the original p-value for the ACME estimate} +\item{ACME_lower}{the lower bound of the CI for the ACME estimate} +\item{ACME_upper}{the upper bound of the CI for the ACME estimate} +\item{ADE}{the Average Direct Effect (ADE) estimate} +\item{ADE_pval}{the original p-value for the ADE estimate} +\item{ADE_lower}{the lower bound of the CI for the ADE estimate} +\item{ADE_upper}{the upper bound of the CI for the ADE estimate} +\item{Total.coef}{the Total Effect estimate} +\item{Total_pval}{the original p-value for the Total Effect estimate} +\item{Total_lower}{the lower bound of the CI for the Total Effect estimate} +\item{Total_upper}{the upper bound of the CI for the Total Effect estimate} +\item{ACME_padj}{the adjusted p-value for the ACME estimate} +\item{ADE_padj}{the adjusted p-value for the ADE estimate} +\item{Total_padj}{the adjusted p-value for the Total Effect estimate} } The original output of \code{\link[mediation:mediate]{mediate}} for each From b220be2675c2e5d8e198e2f654b59a0aafff9311 Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Sun, 26 Jan 2025 19:18:54 +0200 Subject: [PATCH 09/12] Fix tests for getMediation --- R/mediate.R | 2 +- man/getMediation.Rd | 2 +- tests/testthat/test-mediate.R | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index 987ace221..e0e7d1605 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -77,9 +77,9 @@ #' \item{ADE_lower}{the lower bound of the CI for the ADE estimate} #' \item{ADE_upper}{the upper bound of the CI for the ADE estimate} #' \item{Total.coef}{the Total Effect estimate} -#' \item{Total_pval}{the original p-value for the Total Effect estimate} #' \item{Total_lower}{the lower bound of the CI for the Total Effect estimate} #' \item{Total_upper}{the upper bound of the CI for the Total Effect estimate} +#' \item{Total_pval}{the original p-value for the Total Effect estimate} #' \item{ACME_padj}{the adjusted p-value for the ACME estimate} #' \item{ADE_padj}{the adjusted p-value for the ADE estimate} #' \item{Total_padj}{the adjusted p-value for the Total Effect estimate} diff --git a/man/getMediation.Rd b/man/getMediation.Rd index df8693ceb..ec71ae01d 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -108,9 +108,9 @@ instance with the same \code{data.frame} stored in the metadata as \item{ADE_lower}{the lower bound of the CI for the ADE estimate} \item{ADE_upper}{the upper bound of the CI for the ADE estimate} \item{Total.coef}{the Total Effect estimate} -\item{Total_pval}{the original p-value for the Total Effect estimate} \item{Total_lower}{the lower bound of the CI for the Total Effect estimate} \item{Total_upper}{the upper bound of the CI for the Total Effect estimate} +\item{Total_pval}{the original p-value for the Total Effect estimate} \item{ACME_padj}{the adjusted p-value for the ACME estimate} \item{ADE_padj}{the adjusted p-value for the ADE estimate} \item{Total_padj}{the adjusted p-value for the Total Effect estimate} diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index cf0c3d600..0e5349f9a 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -81,9 +81,8 @@ test_that("getMediation", { expect_named(tse, med_df[["Mediator"]]) - expect_named(med_df, c("Mediator", "ACME_estimate", "ADE_estimate", - "Total_estimate", "ACME_pval", "ADE_pval", "Total_pval", "ACME_lower", - "ACME_upper", "ADE_lower", "ADE_upper", "Total_lower", - "Total_upper")) - + expect_named(med_df, c("Mediator", "ACME", "ACME_pval", "ACME_lower", + "ACME_upper", "ADE", "ADE_pval", "ADE_lower", "ADE_upper", "Total.coef", + "Total_lower", "Total_upper", "Total_pval", "ACME_padj", "ADE_padj", + "Total_padj")) }) From b666b64182ca5cdfcc4fa251732b9ce01f7cdb4c Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Mon, 27 Jan 2025 14:32:29 +0200 Subject: [PATCH 10/12] Standardise mediation colnames --- R/mediate.R | 40 +++++++++++++++++------------------ man/getMediation.Rd | 32 ++++++++++++++-------------- tests/testthat/test-mediate.R | 10 ++++----- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/mediate.R b/R/mediate.R index e0e7d1605..ae510c545 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -67,22 +67,22 @@ #' "mediation" or as specified in the \code{name} argument. Its columns include: #' #' \describe{ -#' \item{Mediator}{the mediator variable} -#' \item{ACME}{the Average Causal Mediation Effect (ACME) estimate} -#' \item{ACME_pval}{the original p-value for the ACME estimate} -#' \item{ACME_lower}{the lower bound of the CI for the ACME estimate} -#' \item{ACME_upper}{the upper bound of the CI for the ACME estimate} -#' \item{ADE}{the Average Direct Effect (ADE) estimate} -#' \item{ADE_pval}{the original p-value for the ADE estimate} -#' \item{ADE_lower}{the lower bound of the CI for the ADE estimate} -#' \item{ADE_upper}{the upper bound of the CI for the ADE estimate} -#' \item{Total.coef}{the Total Effect estimate} -#' \item{Total_lower}{the lower bound of the CI for the Total Effect estimate} -#' \item{Total_upper}{the upper bound of the CI for the Total Effect estimate} -#' \item{Total_pval}{the original p-value for the Total Effect estimate} -#' \item{ACME_padj}{the adjusted p-value for the ACME estimate} -#' \item{ADE_padj}{the adjusted p-value for the ADE estimate} -#' \item{Total_padj}{the adjusted p-value for the Total Effect estimate} +#' \item{mediator}{the mediator variable} +#' \item{acme}{the Average Causal Mediation Effect (ACME) estimate} +#' \item{acme_pval}{the original p-value for the ACME estimate} +#' \item{acme_lower}{the lower bound of the CI for the ACME estimate} +#' \item{acme_upper}{the upper bound of the CI for the ACME estimate} +#' \item{ade}{the Average Direct Effect (ADE) estimate} +#' \item{ade_pval}{the original p-value for the ADE estimate} +#' \item{ade_lower}{the lower bound of the CI for the ADE estimate} +#' \item{ade_upper}{the upper bound of the CI for the ADE estimate} +#' \item{total}{the Total Effect estimate} +#' \item{total_lower}{the lower bound of the CI for the Total Effect estimate} +#' \item{total_upper}{the upper bound of the CI for the Total Effect estimate} +#' \item{total_pval}{the original p-value for the Total Effect estimate} +#' \item{acme_padj}{the adjusted p-value for the ACME estimate} +#' \item{ade_padj}{the adjusted p-value for the ADE estimate} +#' \item{total_padj}{the adjusted p-value for the Total Effect estimate} #' } #' #' The original output of \code{\link[mediation:mediate]{mediate}} for each @@ -423,10 +423,10 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), .make_output <- function(models, p.adj.method, add.metadata, sort) { # Combine results res <- do.call(rbind, models) |> as.data.frame() - res[["Mediator"]] <- names(models) + res[["mediator"]] <- names(models) # Select certain data types res <- res |> - select(Mediator, starts_with(c("d.avg", "z.avg", "tau"))) |> + select(mediator, starts_with(c("d.avg", "z.avg", "tau"))) |> select(-ends_with(c("sims"))) # Get columns with scalar values and turn them into vector instead of list cols <- vapply(res, function(col) all(lengths(col) == 1L), logical(1L)) @@ -441,8 +441,8 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), names(lookup) <- limits colnames(res) <- str_replace_all(colnames(res), lookup) # Tidy other names also - lookup <- c("d.avg" = "ACME", "z.avg" = "ADE", "tau" = "Total", - "\\.p" = "_pval", "\\.ci_" = "_") + lookup <- c("d.avg" = "acme", "z.avg" = "ade", "tau" = "total", + "\\.p" = "_pval", "\\.ci_" = "_", "\\.coef" = "") colnames(res) <- str_replace_all(colnames(res), lookup) # Compute adjusted p-values and add them to dataframe diff --git a/man/getMediation.Rd b/man/getMediation.Rd index ec71ae01d..f9648d38a 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -98,22 +98,22 @@ instance with the same \code{data.frame} stored in the metadata as "mediation" or as specified in the \code{name} argument. Its columns include: \describe{ -\item{Mediator}{the mediator variable} -\item{ACME}{the Average Causal Mediation Effect (ACME) estimate} -\item{ACME_pval}{the original p-value for the ACME estimate} -\item{ACME_lower}{the lower bound of the CI for the ACME estimate} -\item{ACME_upper}{the upper bound of the CI for the ACME estimate} -\item{ADE}{the Average Direct Effect (ADE) estimate} -\item{ADE_pval}{the original p-value for the ADE estimate} -\item{ADE_lower}{the lower bound of the CI for the ADE estimate} -\item{ADE_upper}{the upper bound of the CI for the ADE estimate} -\item{Total.coef}{the Total Effect estimate} -\item{Total_lower}{the lower bound of the CI for the Total Effect estimate} -\item{Total_upper}{the upper bound of the CI for the Total Effect estimate} -\item{Total_pval}{the original p-value for the Total Effect estimate} -\item{ACME_padj}{the adjusted p-value for the ACME estimate} -\item{ADE_padj}{the adjusted p-value for the ADE estimate} -\item{Total_padj}{the adjusted p-value for the Total Effect estimate} +\item{mediator}{the mediator variable} +\item{acme}{the Average Causal Mediation Effect (ACME) estimate} +\item{acme_pval}{the original p-value for the ACME estimate} +\item{acme_lower}{the lower bound of the CI for the ACME estimate} +\item{acme_upper}{the upper bound of the CI for the ACME estimate} +\item{ade}{the Average Direct Effect (ADE) estimate} +\item{ade_pval}{the original p-value for the ADE estimate} +\item{ade_lower}{the lower bound of the CI for the ADE estimate} +\item{ade_upper}{the upper bound of the CI for the ADE estimate} +\item{total}{the Total Effect estimate} +\item{total_lower}{the lower bound of the CI for the Total Effect estimate} +\item{total_upper}{the upper bound of the CI for the Total Effect estimate} +\item{total_pval}{the original p-value for the Total Effect estimate} +\item{acme_padj}{the adjusted p-value for the ACME estimate} +\item{ade_padj}{the adjusted p-value for the ADE estimate} +\item{total_padj}{the adjusted p-value for the Total Effect estimate} } The original output of \code{\link[mediation:mediate]{mediate}} for each diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index 0e5349f9a..1b1a6cd50 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -79,10 +79,10 @@ test_that("getMediation", { treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1) - expect_named(tse, med_df[["Mediator"]]) + expect_named(tse, med_df[["mediator"]]) - expect_named(med_df, c("Mediator", "ACME", "ACME_pval", "ACME_lower", - "ACME_upper", "ADE", "ADE_pval", "ADE_lower", "ADE_upper", "Total.coef", - "Total_lower", "Total_upper", "Total_pval", "ACME_padj", "ADE_padj", - "Total_padj")) + expect_named(med_df, c("mediator", "acme", "acme_pval", "acme_lower", + "acme_upper", "ade", "ade_pval", "ade_lower", "ade_upper", "total", + "total_lower", "total_upper", "total_pval", "acme_padj", "ade_padj", + "total_padj")) }) From 7be15273ceff9e1f10ad092297d1840fbc04b87a Mon Sep 17 00:00:00 2001 From: Giulio Benedetti Date: Tue, 28 Jan 2025 12:19:51 +0200 Subject: [PATCH 11/12] Fix sorting in getMediation --- R/mediate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mediate.R b/R/mediate.R index ae510c545..59e4e8d82 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -458,7 +458,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), } # Order output dataframe by ACME p-values if( sort ){ - res <- res[order(res[["ACME_padj"]]), ] + res <- res[order(res[["acme_padj"]]), ] } return(res) } \ No newline at end of file From f1d53fa42e568e87c687712704e3fcee9aed355b Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Tue, 28 Jan 2025 15:53:25 +0200 Subject: [PATCH 12/12] up --- DESCRIPTION | 4 +- NAMESPACE | 3 +- R/convertFromBIOM.R | 112 +++++++++++++++++----------------- R/convertFromDADA2.R | 22 +++---- R/mediate.R | 84 ++++++++++++------------- man/getMediation.Rd | 72 +++++++++++----------- man/importBIOM.Rd | 2 +- man/splitOn.Rd | 4 +- tests/testthat/test-mediate.R | 35 +++++------ 9 files changed, 169 insertions(+), 169 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cdeb9fdf1..cc6be3f8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.19 +Version: 1.15.20 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", @@ -75,7 +75,6 @@ Imports: IRanges, MASS, MatrixGenerics, - mediation, methods, rbiom, rlang, @@ -94,6 +93,7 @@ Suggests: biomformat, dada2, knitr, + mediation, miaViz, microbiomeDataSets, NMF, diff --git a/NAMESPACE b/NAMESPACE index b89a09558..8b940728d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -404,7 +404,6 @@ importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(dplyr,sym) importFrom(dplyr,tally) -importFrom(mediation,mediate) importFrom(rbiom,unifrac) importFrom(rlang,":=") importFrom(rlang,sym) @@ -431,6 +430,8 @@ importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,terms) importFrom(stringr,str_extract) +importFrom(stringr,str_extract_all) +importFrom(stringr,str_pad) importFrom(stringr,str_replace_all) importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) diff --git a/R/convertFromBIOM.R b/R/convertFromBIOM.R index 9e68d6eee..fe0c61a5c 100644 --- a/R/convertFromBIOM.R +++ b/R/convertFromBIOM.R @@ -1,86 +1,86 @@ #' Convert a \code{TreeSummarizedExperiment} object to/from \sQuote{BIOM} #' results #' -#' For convenience, a few functions are available to convert BIOM, DADA2 and -#' phyloseq objects to +#' For convenience, a few functions are available to convert BIOM, DADA2 and +#' phyloseq objects to #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' objects, and +#' objects, and #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} #' objects to phyloseq objects. -#' +#' #' @param prefix.rm \code{Logical scalar}. Should #' taxonomic prefixes be removed? The prefixes is removed only from detected #' taxa columns meaning that \code{rank.from.prefix} should be enabled in the #' most cases. (Default: \code{FALSE}) -#' +#' #' @param removeTaxaPrefixes Deprecated. Use \code{prefix.rm} instead. -#' +#' #' @param rank.from.prefix \code{Logical scalar}. If file does not have #' taxonomic ranks on feature table, should they be scraped from prefixes? #' (Default: \code{FALSE}) -#' +#' #' @param rankFromPrefix Deprecated.Use \code{rank.from.prefix} instead. -#' +#' #' @param artifact.rm \code{Logical scalar}. If file have #' some taxonomic character naming artifacts, should they be removed. #' (default (Default: \code{FALSE}) -#' +#' #' @param remove.artifacts Deprecated. Use \code{artifact.rm} instead. -#' -#' @details +#' +#' @details #' \code{convertFromBIOM} coerces a \code{\link[biomformat:biom-class]{biom}} -#' object to a +#' object to a #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} #' object. -#' +#' #' \code{convertToBIOM} coerces a #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} #' object to a \code{\link[biomformat:biom-class]{biom}} object. -#' +#' #' @return #' \code{convertFromBIOM} returns an object of class #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' +#' #' @name importBIOM -#' +#' #' @seealso #' \code{\link[=importQIIME2]{importQIIME2}} #' \code{\link[=importMothur]{importMothur}} #' #' @examples -#' +#' #' # Convert BIOM results to a TreeSE #' # Load biom file #' library(biomformat) #' biom_file <- system.file("extdata", "rich_dense_otu_table.biom", #' package = "biomformat") -#' +#' #' # Make TreeSE from BIOM object #' biom_object <- biomformat::read_biom(biom_file) #' tse <- convertFromBIOM(biom_object) -#' +#' #' # Convert TreeSE object to BIOM #' biom <- convertToBIOM(tse) -#' +#' NULL #' Import BIOM results to \code{TreeSummarizedExperiment} -#' +#' #' @param file BIOM file location -#' +#' #' @param ... additional arguments to be passed to \code{convertFromBIOM} -#' +#' #' @details -#' \code{importBIOM} loads a BIOM file and creates a -#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} +#' \code{importBIOM} loads a BIOM file and creates a +#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} #' object from the BIOM object contained in the loaded file. -#' -#' @return +#' +#' @return #' \code{importBIOM} returns an object of class #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' +#' #' @name importBIOM -#' +#' #' @seealso #' \code{\link[=importMetaPhlAn]{importMetaPhlAn}} #' \code{\link[=convertFromPhyloseq]{convertFromPhyloseq}} @@ -95,21 +95,21 @@ NULL #' library(biomformat) #' biom_file <- system.file( #' "extdata", "rich_dense_otu_table.biom", package = "biomformat") -#' +#' #' # Make TreeSE from biom file #' tse <- importBIOM(biom_file) -#' +#' #' # Get taxonomyRanks from prefixes and remove prefixes #' tse <- importBIOM( #' biom_file, rank.from.prefix = TRUE, prefix.rm = TRUE) -#' +#' #' # Load another biom file #' biom_file <- system.file( #' "extdata", "Aggregated_humanization2.biom", package = "mia") -#' +#' #' # Clean artifacts from taxonomic data #' tse <- importBIOM(biom_file, artifact.rm = TRUE) -#' +#' #' @export importBIOM <- function(file, ...) { .require_package("biomformat") @@ -118,15 +118,15 @@ importBIOM <- function(file, ...) { } #' @rdname importBIOM -#' +#' #' @param x object of type \code{\link[biomformat:biom-class]{biom}} #' #' @export #' @importFrom S4Vectors make_zero_col_DFrame DataFrame #' @importFrom dplyr %>% bind_rows convertFromBIOM <- function( - x, prefix.rm = removeTaxaPrefixes, - removeTaxaPrefixes = FALSE, rank.from.prefix = rankFromPrefix, + x, prefix.rm = removeTaxaPrefixes, + removeTaxaPrefixes = FALSE, rank.from.prefix = rankFromPrefix, rankFromPrefix = FALSE, artifact.rm = remove.artifacts, remove.artifacts = FALSE, ...){ # input check @@ -147,7 +147,7 @@ convertFromBIOM <- function( counts <- as(biomformat::biom_data(x), "matrix") sample_data <- biomformat::sample_metadata(x) feature_data <- biomformat::observation_metadata(x) - + # colData is initialized with empty tables with rownames if it is NULL if( is.null(sample_data) ){ sample_data <- S4Vectors::make_zero_col_DFrame(ncol(counts)) @@ -164,15 +164,15 @@ convertFromBIOM <- function( rownames(feature_data) <- rownames(counts) # Otherwise convert it into correct format if it is a list } else if( is(feature_data, "list") ){ - # Feature data is a list of taxa info. Dfs are merged together - # differently than sample metadata since the column names are only - # "Taxonomy". If there is only one taxonomy level, the column name does + # Feature data is a list of taxa info. Dfs are merged together + # differently than sample metadata since the column names are only + # "Taxonomy". If there is only one taxonomy level, the column name does # not get a suffix. # --> bind rows based on the index of column. - + # Get the maximum length of list max_length <- max( lengths(feature_data) ) - # Get the column names from the taxa info that has all the levels that + # Get the column names from the taxa info that has all the levels that # occurs in the data colnames <- names( head( feature_data[ lengths(feature_data) == max_length ], 1)[[1]]) @@ -181,7 +181,7 @@ convertFromBIOM <- function( # have all the levels. E.g., if only Kingdom level is found, all lower # ranks are now NA feature_data <- lapply(feature_data, function(x){ - length(x) <- max_length + length(x) <- max_length return(x) }) # Create a data.frame from the list @@ -203,7 +203,7 @@ convertFromBIOM <- function( feature_data <- cbind(tax_tab, feature_data) feature_data <- as.data.frame(feature_data) } - + # Clean feature_data from possible character artifacts if specified. if( artifact.rm ){ feature_data <- .detect_taxa_artifacts_and_clean(feature_data, ...) @@ -224,7 +224,7 @@ convertFromBIOM <- function( # Adjust row and colnames rownames(counts) <- rownames(feature_data) <- biomformat::rownames(x) colnames(counts) <- rownames(sample_data) <- biomformat::colnames(x) - + # Convert into DataFrame sample_data <- DataFrame(sample_data) feature_data <- DataFrame(feature_data) @@ -245,15 +245,15 @@ convertFromBIOM <- function( } #' @rdname importBIOM -#' +#' #' @param x #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} -#' +#' #' @param assay.type \code{Character scaler}. The name of assay. #' (Default: \code{"counts"}) -#' +#' #' @param ... Additional arguments. Not used currently. -#' +#' #' @export setMethod( "convertToBIOM", signature = c(x = "SummarizedExperiment"), @@ -287,8 +287,8 @@ setMethod( stop("'ignore.col' must be a character value or NULL.", call. = FALSE) } # - # Subset by taking only taxonomy info if user want to remove the pattern - # only from those. (Might be too restricting, e.g., if taxonomy columns are + # Subset by taking only taxonomy info if user want to remove the pattern + # only from those. (Might be too restricting, e.g., if taxonomy columns are # not detected in previous steps. That is way the default is FALSE) ind <- rep(TRUE, ncol(feature_tab)) if( only.taxa.col ){ @@ -334,10 +334,11 @@ setMethod( if( sum(found_rank) == 1 ){ colname <- names(prefixes)[found_rank] } - return(colname) + return(colname) } # Detect and clean non wanted characters from Taxonomy data if needed. +#' @importFrom stringr str_extract_all .detect_taxa_artifacts_and_clean <- function( x, pattern = "auto", ignore.col = "taxonomy_unparsed", ...) { # @@ -360,13 +361,12 @@ setMethod( if( ncol(x) > 0 ){ # Remove artifacts if( pattern == "auto" ){ - .require_package("stringr") # Remove all but these characters pattern <- "[[:alnum:]]|-|_|\\[|\\]|,|;\\||[[:space:]]" x <- lapply(x, function(col){ # Take all specified characters as a matrix where each column # is a character - temp <- stringr::str_extract_all( + temp <- str_extract_all( col, pattern = pattern, simplify = TRUE) # Collapse matrix to strings temp <- apply(temp, 1, paste, collapse = "") @@ -403,7 +403,7 @@ setMethod( # Check if assay contains integers or floats. biom constructor # requires that information since the default value is "int". mat_type <- ifelse(all(assay %% 1 == 0), "int", "float") - + # Create argument list args <- list(data = assay, matrix_element_type = mat_type) # Add rowData and colData only if they contain information diff --git a/R/convertFromDADA2.R b/R/convertFromDADA2.R index e2de656d8..a1da10198 100644 --- a/R/convertFromDADA2.R +++ b/R/convertFromDADA2.R @@ -1,20 +1,20 @@ #' Create a \code{TreeSummarizedExperiment} object from \sQuote{DADA2} results #' -#' @param ... Additional arguments. For \code{convertFromDADA2}, see +#' @param ... Additional arguments. For \code{convertFromDADA2}, see #' \code{mergePairs} function for more details. #' #' @details -#' +#' #' \code{convertFromDADA2} is a wrapper for the #' \code{mergePairs} function from the \code{dada2} package. -#' A count matrix is constructed via -#' \code{makeSequenceTable(mergePairs(...))} and rownames are dynamically -#' created as \code{ASV(N)} with \code{N} from 1 to \code{nrow} of the count +#' A count matrix is constructed via +#' \code{makeSequenceTable(mergePairs(...))} and rownames are dynamically +#' created as \code{ASV(N)} with \code{N} from 1 to \code{nrow} of the count #' tables. The colnames and rownames from the output of \code{makeSequenceTable} -#' are stored as \code{colnames} and in the \code{referenceSeq} slot of the +#' are stored as \code{colnames} and in the \code{referenceSeq} slot of the #' \code{TreeSummarizedExperiment}, respectively. #' -#' @return \code{convertFromDADA2} returns an object of class +#' @return \code{convertFromDADA2} returns an object of class #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} #' #' @importFrom S4Vectors SimpleList @@ -25,7 +25,7 @@ #' @export #' #' @examples -#' +#' #' ### Coerce DADA2 results to a TreeSE object #' if(requireNamespace("dada2")) { #' fnF <- system.file("extdata", "sam1F.fastq.gz", package="dada2") @@ -36,19 +36,17 @@ #' tse <- convertFromDADA2(dadaF, fnF, dadaR, fnR) #' tse #' } +#' @importFrom stringr str_pad convertFromDADA2 <- function(...) { # input checks .require_package("dada2") - .require_package("stringr") # mergers <- dada2::mergePairs(...) seqtab <- dada2::makeSequenceTable(mergers) seqtab <- t(seqtab) # generate row and col names rName <- paste0("ASV", - stringr::str_pad(seq.int(1L,nrow(seqtab)), - nchar(nrow(seqtab)) + 1L, - pad="0")) + str_pad(seq.int(1L,nrow(seqtab)), nchar(nrow(seqtab)) + 1L, pad="0")) cName <- colnames(seqtab) # retrieve count data and reference sequence assays <- S4Vectors::SimpleList(counts = unname(seqtab)) diff --git a/R/mediate.R b/R/mediate.R index 59e4e8d82..8ebf4a687 100644 --- a/R/mediate.R +++ b/R/mediate.R @@ -109,55 +109,55 @@ #' #' # Analyse mediated effect of nationality on BMI via alpha diversity #' # 100 permutations were done to speed up execution, but ~1000 are recommended -#' med_df <- getMediation(tse, -#' outcome = "bmi_group", -#' treatment = "nationality", -#' mediator = "diversity", -#' covariates = c("sex", "age"), -#' treat.value = "Scandinavia", -#' control.value = "CentralEurope", -#' boot = TRUE, sims = 100) -#' -#' # Visualise model statistics for 1st mediator -#' plotMediation(med_df) +#' med_df <- getMediation( +#' tse, +#' outcome = "bmi_group", +#' treatment = "nationality", +#' mediator = "diversity", +#' covariates = c("sex", "age"), +#' treat.value = "Scandinavia", +#' control.value = "CentralEurope", +#' boot = TRUE, sims = 100) +#' +#' # Visualise model statistics +#' plotMediation(med_df) #' #' # Apply clr transformation to counts assay -#' tse <- transformAssay(tse, -#' method = "clr", -#' pseudocount = 1) +#' tse <- transformAssay(tse, method = "clr", pseudocount = 1) #' #' # Analyse mediated effect of nationality on BMI via clr-transformed features #' # 100 permutations were done to speed up execution, but ~1000 are recommended -#' tse <- addMediation(tse, name = "assay_mediation", -#' outcome = "bmi_group", -#' treatment = "nationality", -#' assay.type = "clr", -#' covariates = c("sex", "age"), -#' treat.value = "Scandinavia", -#' control.value = "CentralEurope", -#' boot = TRUE, sims = 100, -#' p.adj.method = "fdr") +#' tse <- addMediation( +#' tse, name = "assay_mediation", +#' outcome = "bmi_group", +#' treatment = "nationality", +#' assay.type = "clr", +#' covariates = c("sex", "age"), +#' treat.value = "Scandinavia", +#' control.value = "CentralEurope", +#' boot = TRUE, sims = 100, +#' p.adj.method = "fdr") #' #' # Show results for first 5 mediators #' head(metadata(tse)$assay_mediation, 5) #' #' # Perform ordination -#' tse <- runMDS(tse, name = "MDS", -#' method = "euclidean", -#' assay.type = "clr", -#' ncomponents = 3) +#' tse <- runMDS( +#' tse, name = "MDS", method = "euclidean", +#' assay.type = "clr", ncomponents = 3) #' #' # Analyse mediated effect of nationality on BMI via NMDS components #' # 100 permutations were done to speed up execution, but ~1000 are recommended -#' tse <- addMediation(tse, name = "reddim_mediation", -#' outcome = "bmi_group", -#' treatment = "nationality", -#' dimred = "MDS", -#' covariates = c("sex", "age"), -#' treat.value = "Scandinavia", -#' control.value = "CentralEurope", -#' boot = TRUE, sims = 100, -#' p.adj.method = "fdr") +#' tse <- addMediation( +#' tse, name = "reddim_mediation", +#' outcome = "bmi_group", +#' treatment = "nationality", +#' dimred = "MDS", +#' covariates = c("sex", "age"), +#' treat.value = "Scandinavia", +#' control.value = "CentralEurope", +#' boot = TRUE, sims = 100, +#' p.adj.method = "fdr") #' #' # Show results for first 5 mediators #' head(metadata(tse)$reddim_mediation, 5) @@ -362,11 +362,10 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), } # Run mediation analysis -#' @importFrom mediation mediate #' @importFrom stats lm formula glm .run_mediate <- function(x, outcome, treatment, mediator = NULL, mat = NULL, family = gaussian(), covariates = NULL, ...) { - + .require_package("mediation") # Create initial dataframe with outcome and treatment variables df <- data.frame( Outcome = colData(x)[[outcome]], Treatment = colData(x)[[treatment]]) @@ -407,7 +406,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), list(formula = formula(relation_dv), family = family, data = df) ) # Run mediation analysis - med_out <- mediate( + med_out <- mediation::mediate( fit_m, fit_dv, treat = "Treatment", mediator = "Mediator", covariates = covariates, ... @@ -420,6 +419,7 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), #' @importFrom dplyr select #' @importFrom tidyr starts_with ends_with unnest_wider #' @importFrom stringr str_extract str_replace_all +#' @importFrom stats p.adjust .make_output <- function(models, p.adj.method, add.metadata, sort) { # Combine results res <- do.call(rbind, models) |> as.data.frame() @@ -432,9 +432,9 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), cols <- vapply(res, function(col) all(lengths(col) == 1L), logical(1L)) res[, cols] <- lapply(res[, cols], unlist) # Unnest confidence interval columns - res <- res |> unnest_wider(tidyr::ends_with(".ci"), names_sep = "_") + res <- res |> unnest_wider(ends_with(".ci"), names_sep = "_") # Replace confidence interval values with "lower" and "upper" - limits <- unique(str_extract(colnames(res), "([0-9.]+)%")) + limits <- str_extract(colnames(res), "([0-9.]+)%") |> unique() limits <- limits[!is.na(limits)] limits <- limits[order(as.numeric(gsub("%", "", limits)))] lookup <- c("lower", "upper") @@ -461,4 +461,4 @@ setMethod("getMediation", signature = c(x = "SummarizedExperiment"), res <- res[order(res[["acme_padj"]]), ] } return(res) -} \ No newline at end of file +} diff --git a/man/getMediation.Rd b/man/getMediation.Rd index f9648d38a..2f69d7d3f 100644 --- a/man/getMediation.Rd +++ b/man/getMediation.Rd @@ -152,55 +152,55 @@ tse$bmi_group <- as.numeric(tse$bmi_group) # Analyse mediated effect of nationality on BMI via alpha diversity # 100 permutations were done to speed up execution, but ~1000 are recommended -med_df <- getMediation(tse, - outcome = "bmi_group", - treatment = "nationality", - mediator = "diversity", - covariates = c("sex", "age"), - treat.value = "Scandinavia", - control.value = "CentralEurope", - boot = TRUE, sims = 100) - - # Visualise model statistics for 1st mediator - plotMediation(med_df) +med_df <- getMediation( + tse, + outcome = "bmi_group", + treatment = "nationality", + mediator = "diversity", + covariates = c("sex", "age"), + treat.value = "Scandinavia", + control.value = "CentralEurope", + boot = TRUE, sims = 100) + +# Visualise model statistics +plotMediation(med_df) # Apply clr transformation to counts assay -tse <- transformAssay(tse, - method = "clr", - pseudocount = 1) +tse <- transformAssay(tse, method = "clr", pseudocount = 1) # Analyse mediated effect of nationality on BMI via clr-transformed features # 100 permutations were done to speed up execution, but ~1000 are recommended -tse <- addMediation(tse, name = "assay_mediation", - outcome = "bmi_group", - treatment = "nationality", - assay.type = "clr", - covariates = c("sex", "age"), - treat.value = "Scandinavia", - control.value = "CentralEurope", - boot = TRUE, sims = 100, - p.adj.method = "fdr") +tse <- addMediation( + tse, name = "assay_mediation", + outcome = "bmi_group", + treatment = "nationality", + assay.type = "clr", + covariates = c("sex", "age"), + treat.value = "Scandinavia", + control.value = "CentralEurope", + boot = TRUE, sims = 100, + p.adj.method = "fdr") # Show results for first 5 mediators head(metadata(tse)$assay_mediation, 5) # Perform ordination -tse <- runMDS(tse, name = "MDS", - method = "euclidean", - assay.type = "clr", - ncomponents = 3) +tse <- runMDS( + tse, name = "MDS", method = "euclidean", + assay.type = "clr", ncomponents = 3) # Analyse mediated effect of nationality on BMI via NMDS components # 100 permutations were done to speed up execution, but ~1000 are recommended -tse <- addMediation(tse, name = "reddim_mediation", - outcome = "bmi_group", - treatment = "nationality", - dimred = "MDS", - covariates = c("sex", "age"), - treat.value = "Scandinavia", - control.value = "CentralEurope", - boot = TRUE, sims = 100, - p.adj.method = "fdr") +tse <- addMediation( + tse, name = "reddim_mediation", + outcome = "bmi_group", + treatment = "nationality", + dimred = "MDS", + covariates = c("sex", "age"), + treat.value = "Scandinavia", + control.value = "CentralEurope", + boot = TRUE, sims = 100, + p.adj.method = "fdr") # Show results for first 5 mediators head(metadata(tse)$reddim_mediation, 5) diff --git a/man/importBIOM.Rd b/man/importBIOM.Rd index ef2cf2bc4..563768409 100644 --- a/man/importBIOM.Rd +++ b/man/importBIOM.Rd @@ -116,7 +116,7 @@ biom_file <- system.file( # Clean artifacts from taxonomic data tse <- importBIOM(biom_file, artifact.rm = TRUE) - + } \seealso{ \code{\link[=importQIIME2]{importQIIME2}} diff --git a/man/splitOn.Rd b/man/splitOn.Rd index 7b7437aab..4f580e176 100644 --- a/man/splitOn.Rd +++ b/man/splitOn.Rd @@ -105,10 +105,10 @@ group. \examples{ data(GlobalPatterns) tse <- GlobalPatterns -# Split data based on SampleType. +# Split data based on SampleType. se_list <- splitOn(tse, group = "SampleType") -# List of SE objects is returned. +# List of SE objects is returned. se_list # Create arbitrary groups diff --git a/tests/testthat/test-mediate.R b/tests/testthat/test-mediate.R index 1b1a6cd50..6962b6a18 100644 --- a/tests/testthat/test-mediate.R +++ b/tests/testthat/test-mediate.R @@ -1,86 +1,87 @@ test_that("getMediation", { - + skip_if_not(require("miaTime", quietly = TRUE)) data("hitchip1006", package = "miaTime") tse <- hitchip1006 - + tse <- agglomerateByRank(tse, rank = "Phylum") tse$bmi_group <- as.numeric(tse$bmi_group) - + ### Batch 1: check errors when missing or wrong arguments ### expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", mediator = "diversity", assay.type = "counts"), "The arguments mediator, assay.type and dimred are mutually exclusive, but 2 were provided." ) - + expect_error( getMediation(tse, outcome = "wrong_name", treatment = "nationality", mediator = "diversity"), "wrong_name not found in colData(x).", fixed = TRUE ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "wrong_name", mediator = "diversity"), "wrong_name not found in colData(x).", fixed = TRUE ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", mediator = "wrong_name"), "wrong_name not found in colData(x).", fixed = TRUE ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", assay.type = "wrong_name"), "'assay.type' must be a valid name of assays(x)", fixed = TRUE ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", dimred = "wrong_name"), "wrong_name not found in reducedDims(x).", fixed = TRUE ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", mediator = "diversity", boot = TRUE, sims = 1), "Too many treatment levels. Consider specifing a treat.value and a control.value" ) - + expect_error( getMediation(tse, outcome = "bmi_group", treatment = "nationality", mediator = "diversity", boot = TRUE, sims = 1, treat.value = "wrong_value", control.value = "wrong_value"), "treat.value and/or control.value not found in the levels of the treatment variable." ) - + + skip_if_not_installed("mediation") ### Batch 2: check equality between base function and wrapper ### set.seed(123) med_df <- getMediation(tse, outcome = "bmi_group", treatment = "nationality", mediator = "diversity", treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1, add.metadata = TRUE) - + df <- data.frame(Outcome = tse$bmi_group, Treatment = tse$nationality, Mediator = tse$diversity) df <- na.omit(df) df <- df[df$Treatment %in% c("Scandinavia", "CentralEurope"), ] fit_m <- lm(Mediator ~ Treatment, data = df) fit_dv <- glm(Outcome ~ Treatment + Mediator, data = df) - + set.seed(123) med_out <- mediate(fit_m, fit_dv, treat = "Treatment", mediator = "Mediator", treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1) - + expect_equal(attr(med_df, "model_metadata")[[1]][["d.avg"]], med_out$d.avg) expect_equal(attr(med_df, "model_metadata")[[1]][["d.avg.p"]], med_out$d.avg.p) expect_equal(attr(med_df, "model_metadata")[[1]][["z.avg"]], med_out$z.avg) expect_equal(attr(med_df, "model_metadata")[[1]][["z.avg.p"]], med_out$z.avg.p) - + ### Batch 3: check output format and dimensionality with respect to SE ### med_df <- getMediation(tse, outcome = "bmi_group", treatment = "nationality", assay.type = "counts", treat.value = "Scandinavia", control.value = "CentralEurope", boot = TRUE, sims = 1) - + expect_named(tse, med_df[["mediator"]]) - + expect_named(med_df, c("mediator", "acme", "acme_pval", "acme_lower", "acme_upper", "ade", "ade_pval", "ade_lower", "ade_upper", "total", "total_lower", "total_upper", "total_pval", "acme_padj", "ade_padj",