diff --git a/R/DoubletFinder.R b/R/DoubletFinder.R index 09adb554..fa2e2f3d 100644 --- a/R/DoubletFinder.R +++ b/R/DoubletFinder.R @@ -27,8 +27,10 @@ #' \code{DoubletFinder_classification} updated in \code{cellMeta} slot #' @export #' @examples -#' pbmc <- runDoubletFinder(pbmc) -#' cellMeta(pbmc) +#' if (requireNamespace("DoubletFinder", quietly = TRUE)) { +#' pbmc <- runDoubletFinder(pbmc) +#' print(cellMeta(pbmc)) +#' } runDoubletFinder <- function( object, useDatasets = NULL, diff --git a/R/GSEA.R b/R/GSEA.R index 17c11656..04c0bcc1 100644 --- a/R/GSEA.R +++ b/R/GSEA.R @@ -16,7 +16,9 @@ #' @return A list of matrices with GSEA analysis for each factor #' @export #' @examples +#' \donttest{ #' runGSEA(pbmcPlot) +#' } runGSEA <- function( object, genesets = NULL, diff --git a/R/clustering.R b/R/clustering.R index 2a3b6c9e..8b36f690 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -1,13 +1,14 @@ #' SNN Graph Based Community Detection -#' @description After quantile normalization, users can additionally run the -#' Leiden or Louvain algorithm for community detection, which is widely used in +#' @description +#' After quantile normalization, users can additionally run the Leiden or +#' Louvain algorithm for community detection, which is widely used in #' single-cell analysis and excels at merging small clusters into broad cell #' classes. #' #' While using quantile normalized factor loadings (result from #' \code{\link{quantileNorm}}) is recommended, this function looks for -#' unnormalized factor loadings (result from \code{\link{optimizeALS}} or -#' \code{\link{online_iNMF}}) when the former is not available. +#' unnormalized factor loadings (result from \code{\link{runIntegration}}) when +#' the former is not available. #' @param object A \linkS4class{liger} object. Should have valid factorization #' result available. #' @param nNeighbors Integer, the maximum number of nearest neighbors to @@ -28,6 +29,9 @@ #' Default \code{5}. #' @param method Community detection algorithm to use. Choose from #' \code{"leiden"} or \code{"louvain"}. Default \code{"leiden"}. +#' @param useRaw Whether to use un-aligned cell factor loadings (\eqn{H} +#' matrices). Default \code{NULL} search for quantile-normalized loadings first +#' and un-aligned loadings then. #' @param useDims Indices of factors to use for clustering. Default \code{NULL} #' uses all available factors. #' @param groupSingletons Whether to group single cells that make up their own @@ -58,6 +62,7 @@ runCluster <- function( nRandomStarts = 10, nIterations = 5, method = c("leiden", "louvain"), + useRaw = NULL, useDims = NULL, groupSingletons = TRUE, clusterName = paste0(method, "_cluster"), @@ -69,21 +74,16 @@ runCluster <- function( leiden = recordCommand(object, dependencies = c("RANN", "leidenAlg")), louvain = recordCommand(object, dependencies = c("RANN")) ) + Hsearch <- searchH(object, useRaw) + H <- Hsearch$H + useRaw <- Hsearch$useRaw + type <- ifelse(useRaw, " unnormalized ", " quantile normalized ") - H.norm <- getMatrix(object, "H.norm") - if (is.null(H.norm)) { - type <- " unnormalized " - H.norm <- Reduce(cbind, getMatrix(object, "H")) - } else type <- " quantile normalized " - if (is.null(H.norm)) - stop("No factor loading ('H.norm' or 'H') found in `object`.") - if (type == " unnormalized ") H.norm <- t(H.norm) - - if (!is.null(useDims)) H.norm <- H.norm[, useDims] + if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] if (isTRUE(verbose)) .log(method, " clustering on", type, "cell factor loadings...") - knn <- RANN::nn2(H.norm, k = nNeighbors, eps = eps) + knn <- RANN::nn2(H, k = nNeighbors, eps = eps) snn <- ComputeSNN(knn$nn.idx, prune = prune) if (!is.null(seed)) set.seed(seed) if (method == "leiden") { @@ -93,7 +93,7 @@ runCluster <- function( edge_weights <- snnSummary[,3] clusts <- leidenAlg::find_partition_with_rep_rcpp( edgelist = edgelist, edgelist_length = edgelist_length, - num_vertices = nrow(H.norm), direction = FALSE, + num_vertices = nrow(H), direction = FALSE, edge_weights = edge_weights, resolution = resolution, niter = nIterations, nrep = nRandomStarts ) diff --git a/R/data.R b/R/data.R index 242f7fd9..6269e1f2 100644 --- a/R/data.R +++ b/R/data.R @@ -7,10 +7,10 @@ #' liger object of PBMC subsample data with plotting information available #' @description This data was generated from data \code{"pbmc"} with default -#' parameter integration pipeline, with setting \code{k = 20, maxIter = 10} -#' for \code{\link{optimizeALS}}. UMAP was generated with \code{minDist = 0.5}. -#' Only the 2nd and 3rd factors, the normalized expression of the top 50 -#' variable genes, and the clustering label were kept for visualization example. +#' parameter integration pipeline: normalize, selectGenes, scaleNotCenter, +#' runINMF, runCluster, runUMAP. To minimize the object size distributed with +#' the package, rawData and scaleData were removed. Genes are downsampled to +#' the top 50 variable genes, for smaller normData and \eqn{W} matrix. #' @format \linkS4class{liger} object with two datasets named by "ctrl" and #' "stim". #' @source https://www.nature.com/articles/nbt.4042 diff --git a/R/embedding.R b/R/embedding.R index b0ac1f55..71aeb639 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -1,12 +1,12 @@ #' Perform UMAP Dimensionality Reduction -#' @description Run UMAP on the quantile normalized cell factors (result from -#' \code{\link{quantileNorm}}), or unnormalized cell -#' factors (result from \code{\link{optimizeALS}} or \code{\link{online_iNMF}})) -#' to generate a 2D embedding for visualization (or general dimensionality -#' reduction). Has option to run on subset of factors. It is generally -#' recommended to use this method for dimensionality reduction with extremely -#' large datasets. The underlying UMAP calculation imports uwot -#' \code{\link[uwot]{umap}}. +#' @description +#' Run UMAP on the quantile normalized cell factors (result from +#' \code{\link{quantileNorm}}), or unnormalized cell factors (result from +#' \code{\link{runIntegration}})) to generate a 2D embedding for visualization +#' (or general dimensionality reduction). Has option to run on subset of +#' factors. It is generally recommended to use this method for dimensionality +#' reduction with extremely large datasets. The underlying UMAP calculation +#' imports uwot \code{\link[uwot]{umap}}. #' @details For \code{nNeighbors}, larger values will result in more global #' structure being preserved at the loss of detailed local structure. In general #' this parameter should often be in the range 5 to 50, with a choice of 10 to @@ -18,7 +18,8 @@ #' 0.001 to 0.5, with 0.1 being a reasonable default. #' @param object \linkS4class{liger} object with factorization results. #' @param useRaw Whether to use un-aligned cell factor loadings (\eqn{H} -#' matrices). Default \code{FALSE}. +#' matrices). Default \code{NULL} search for quantile-normalized loadings first +#' and un-aligned loadings then. #' @param useDims Index of factors to use for computing UMAP embedding. Default #' \code{NULL} uses all factors. #' @param nDims Number of dimensions to reduce to. Default \code{2}. @@ -44,7 +45,7 @@ #' pbmc <- runUMAP(pbmcPlot) runUMAP <- function( object, - useRaw = FALSE, + useRaw = NULL, useDims = NULL, nDims = 2, distance = c("cosine", "euclidean", "manhattan", "hamming"), @@ -67,16 +68,13 @@ runUMAP <- function( distance <- match.arg(distance) object <- recordCommand(object, dependencies = "uwot") set.seed(seed) - if (isTRUE(useRaw)) { - type <- " unnormalized " - H <- t(Reduce(cbind, getMatrix(object, "H"))) - } else { - type <- " quantile normalized " - H <- getMatrix(object, "H.norm") - } + Hsearch <- searchH(object, useRaw) + H <- Hsearch$H + useRaw <- Hsearch$useRaw + type <- ifelse(useRaw, " unnormalized ", " quantile normalized ") if (isTRUE(verbose)) .log("Generating UMAP on", type, "cell factor loadings...") - if (!is.null(useDims)) H <- H[, useDims] + if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] umap <- uwot::umap(H, n_components = as.integer(nDims), metric = distance, @@ -87,13 +85,14 @@ runUMAP <- function( } #' Perform t-SNE dimensionality reduction -#' @description Runs t-SNE on the quantile normalized cell factors (result from +#' @description +#' Runs t-SNE on the quantile normalized cell factors (result from #' \code{\link{quantileNorm}}), or unnormalized cell factors (result from -#' \code{\link{optimizeALS}} or \code{\link{online_iNMF}})) to generate a 2D -#' embedding for visualization. By default \code{\link[Rtsne]{Rtsne}} -#' (Barnes-Hut implementation of t-SNE) method is invoked, while alternative -#' "fftRtsne" method (FFT-accelerated Interpolation-based t-SNE, using Kluger -#' Lab implementation) is also supported. +#' \code{\link{runIntegration}})) to generate a 2D embedding for visualization. +#' By default \code{\link[Rtsne]{Rtsne}} (Barnes-Hut implementation of t-SNE) +#' method is invoked, while alternative "fftRtsne" method (FFT-accelerated +#' Interpolation-based t-SNE, using Kluger Lab implementation) is also +#' supported. #' #' In order to run fftRtsne (recommended for large datasets), FIt-SNE must be #' installed as instructed in detailed @@ -104,7 +103,8 @@ runUMAP <- function( #' \href{https://github.com/welch-lab/liger#readme}{README}. #' @param object \linkS4class{liger} object with factorization results. #' @param useRaw Whether to use un-aligned cell factor loadings (\eqn{H} -#' matrices). Default \code{FALSE}. +#' matrices). Default \code{NULL} search for quantile-normalized loadings first +#' and un-aligned loadings then. #' @param useDims Index of factors to use for computing UMAP embedding. Default #' \code{NULL} uses all factors. #' @param nDims Number of dimensions to reduce to. Default \code{2}. @@ -134,7 +134,7 @@ runUMAP <- function( #' pbmc <- runTSNE(pbmcPlot) runTSNE <- function( object, - useRaw = FALSE, + useRaw = NULL, useDims = NULL, nDims = 2, usePCA = FALSE, @@ -158,20 +158,17 @@ runTSNE <- function( rand.seed = "seed")) method <- match.arg(method) object <- recordCommand(object, dependencies = "Rtsne") - if (isTRUE(useRaw)) { - type <- " unnormalized " - data.use <- t(Reduce(cbind, getMatrix(object, "H"))) - } else { - type <- " quantile normalized " - data.use <- getMatrix(object, "H.norm") - } + Hsearch <- searchH(object, useRaw) + H <- Hsearch$H + useRaw <- Hsearch$useRaw + type <- ifelse(useRaw, " unnormalized ", " quantile normalized ") if (isTRUE(verbose)) .log("Generating TSNE (", method, ") on", type, "cell factor loadings...") - if (!is.null(useDims)) data.use <- data.use[, useDims] + if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] if (method == "Rtsne") { set.seed(seed) - tsne <- Rtsne::Rtsne(data.use, + tsne <- Rtsne::Rtsne(H, dims = nDims, pca = usePCA, check_duplicates = FALSE, @@ -179,7 +176,7 @@ runTSNE <- function( perplexity = perplexity) tsne <- tsne$Y } else if (method == "fftRtsne") { - tsne <- .fftRtsne(data.use, + tsne <- .fftRtsne(H, dims = nDims, rand_seed = seed, fast_tsne_path = fitsnePath, diff --git a/R/util.R b/R/util.R index 3d030aa8..7eb82c43 100644 --- a/R/util.R +++ b/R/util.R @@ -545,3 +545,42 @@ splitRmMiss <- function(x, y) { names(matList) <- levels(y) return(matList) } + +searchH <- function(object, useRaw = NULL) { + if (is.null(useRaw)) { + # By default, look for quantile-normed H + H <- getMatrix(object, "H.norm") + if (is.null(H)) { + # If not found, look for raw H + Ht <- Reduce(cbind, getMatrix(object, "H")) + if (is.null(Ht)) { + stop("No cell factor loading available. ", + "Please run `runIntegration()` and `quantileNorm()` first.") + } else { + useRaw <- TRUE + H <- t(Ht) + } + } else { + useRaw <- FALSE + } + } else { + if (isTRUE(useRaw)) { + Ht <- Reduce(cbind, getMatrix(object, "H")) + if (is.null(Ht)) { + stop("Raw cell factor loading requested but not found. ", + "Please run `runIntegration()`.") + } else { + H <- t(Ht) + } + } else { + H <- getMatrix(object, "H.norm") + if (is.null(H)) { + stop("Quantile-normalized cell factor loading requested but ", + "not found. Please run `quantileNorm()` after ", + "`runIntegration()`.") + } + useRaw <- FALSE + } + } + return(list(H = H, useRaw = useRaw)) +} diff --git a/data/pbmcPlot.rda b/data/pbmcPlot.rda index 5438a0eb..2cf8de18 100644 Binary files a/data/pbmcPlot.rda and b/data/pbmcPlot.rda differ diff --git a/man/pbmcPlot.Rd b/man/pbmcPlot.Rd index 9ecb23c1..d5c55ffa 100644 --- a/man/pbmcPlot.Rd +++ b/man/pbmcPlot.Rd @@ -16,10 +16,10 @@ pbmcPlot } \description{ This data was generated from data \code{"pbmc"} with default -parameter integration pipeline, with setting \code{k = 20, maxIter = 10} -for \code{\link{optimizeALS}}. UMAP was generated with \code{minDist = 0.5}. -Only the 2nd and 3rd factors, the normalized expression of the top 50 -variable genes, and the clustering label were kept for visualization example. +parameter integration pipeline: normalize, selectGenes, scaleNotCenter, +runINMF, runCluster, runUMAP. To minimize the object size distributed with +the package, rawData and scaleData were removed. Genes are downsampled to +the top 50 variable genes, for smaller normData and \eqn{W} matrix. } \references{ Hyun Min Kang and et. al., Nature Biotechnology, 2018 diff --git a/man/runCluster.Rd b/man/runCluster.Rd index 70561006..5d3ec01b 100644 --- a/man/runCluster.Rd +++ b/man/runCluster.Rd @@ -13,6 +13,7 @@ runCluster( nRandomStarts = 10, nIterations = 5, method = c("leiden", "louvain"), + useRaw = NULL, useDims = NULL, groupSingletons = TRUE, clusterName = paste0(method, "_cluster"), @@ -49,6 +50,10 @@ Default \code{5}.} \item{method}{Community detection algorithm to use. Choose from \code{"leiden"} or \code{"louvain"}. Default \code{"leiden"}.} +\item{useRaw}{Whether to use un-aligned cell factor loadings (\eqn{H} +matrices). Default \code{NULL} search for quantile-normalized loadings first +and un-aligned loadings then.} + \item{useDims}{Indices of factors to use for clustering. Default \code{NULL} uses all available factors.} @@ -71,15 +76,15 @@ in \code{cellMeta} slot of \code{object}. Default \code{"leiden_cluster"} and with \code{object[[clusterName]]} } \description{ -After quantile normalization, users can additionally run the -Leiden or Louvain algorithm for community detection, which is widely used in +After quantile normalization, users can additionally run the Leiden or +Louvain algorithm for community detection, which is widely used in single-cell analysis and excels at merging small clusters into broad cell classes. While using quantile normalized factor loadings (result from \code{\link{quantileNorm}}) is recommended, this function looks for -unnormalized factor loadings (result from \code{\link{optimizeALS}} or -\code{\link{online_iNMF}}) when the former is not available. +unnormalized factor loadings (result from \code{\link{runIntegration}}) when +the former is not available. } \examples{ pbmcPlot <- runCluster(pbmcPlot) diff --git a/man/runDoubletFinder.Rd b/man/runDoubletFinder.Rd index d692f51d..d95ae1a8 100644 --- a/man/runDoubletFinder.Rd +++ b/man/runDoubletFinder.Rd @@ -53,6 +53,8 @@ single-sample Seurat object with \code{CreateSeuratObject(rawData(object, "datasetName"))}. } \examples{ -pbmc <- runDoubletFinder(pbmc) -cellMeta(pbmc) +if (requireNamespace("DoubletFinder", quietly = TRUE)) { + pbmc <- runDoubletFinder(pbmc) + print(cellMeta(pbmc)) +} } diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd index ef2ca983..520dac70 100644 --- a/man/runGSEA.Rd +++ b/man/runGSEA.Rd @@ -43,5 +43,7 @@ Identify the biological pathways (gene sets from Reactome) that each metagene (factor) might belongs to. } \examples{ +\donttest{ runGSEA(pbmcPlot) } +} diff --git a/man/runTSNE.Rd b/man/runTSNE.Rd index c58afc38..b78297d8 100644 --- a/man/runTSNE.Rd +++ b/man/runTSNE.Rd @@ -6,7 +6,7 @@ \usage{ runTSNE( object, - useRaw = FALSE, + useRaw = NULL, useDims = NULL, nDims = 2, usePCA = FALSE, @@ -29,7 +29,8 @@ runTSNE( \item{object}{\linkS4class{liger} object with factorization results.} \item{useRaw}{Whether to use un-aligned cell factor loadings (\eqn{H} -matrices). Default \code{FALSE}.} +matrices). Default \code{NULL} search for quantile-normalized loadings first +and un-aligned loadings then.} \item{useDims}{Index of factors to use for computing UMAP embedding. Default \code{NULL} uses all factors.} @@ -70,11 +71,11 @@ The \code{object} where a \code{"TSNE"} variable is updated in the \description{ Runs t-SNE on the quantile normalized cell factors (result from \code{\link{quantileNorm}}), or unnormalized cell factors (result from -\code{\link{optimizeALS}} or \code{\link{online_iNMF}})) to generate a 2D -embedding for visualization. By default \code{\link[Rtsne]{Rtsne}} -(Barnes-Hut implementation of t-SNE) method is invoked, while alternative -"fftRtsne" method (FFT-accelerated Interpolation-based t-SNE, using Kluger -Lab implementation) is also supported. +\code{\link{runIntegration}})) to generate a 2D embedding for visualization. +By default \code{\link[Rtsne]{Rtsne}} (Barnes-Hut implementation of t-SNE) +method is invoked, while alternative "fftRtsne" method (FFT-accelerated +Interpolation-based t-SNE, using Kluger Lab implementation) is also +supported. In order to run fftRtsne (recommended for large datasets), FIt-SNE must be installed as instructed in detailed diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index a1390f2d..17bf58bf 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -6,7 +6,7 @@ \usage{ runUMAP( object, - useRaw = FALSE, + useRaw = NULL, useDims = NULL, nDims = 2, distance = c("cosine", "euclidean", "manhattan", "hamming"), @@ -27,7 +27,8 @@ runUMAP( \item{object}{\linkS4class{liger} object with factorization results.} \item{useRaw}{Whether to use un-aligned cell factor loadings (\eqn{H} -matrices). Default \code{FALSE}.} +matrices). Default \code{NULL} search for quantile-normalized loadings first +and un-aligned loadings then.} \item{useDims}{Index of factors to use for computing UMAP embedding. Default \code{NULL} uses all factors.} @@ -61,13 +62,12 @@ The \code{object} where a \code{"UMAP"} variable is updated in the } \description{ Run UMAP on the quantile normalized cell factors (result from -\code{\link{quantileNorm}}), or unnormalized cell -factors (result from \code{\link{optimizeALS}} or \code{\link{online_iNMF}})) -to generate a 2D embedding for visualization (or general dimensionality -reduction). Has option to run on subset of factors. It is generally -recommended to use this method for dimensionality reduction with extremely -large datasets. The underlying UMAP calculation imports uwot -\code{\link[uwot]{umap}}. +\code{\link{quantileNorm}}), or unnormalized cell factors (result from +\code{\link{runIntegration}})) to generate a 2D embedding for visualization +(or general dimensionality reduction). Has option to run on subset of +factors. It is generally recommended to use this method for dimensionality +reduction with extremely large datasets. The underlying UMAP calculation +imports uwot \code{\link[uwot]{umap}}. } \details{ For \code{nNeighbors}, larger values will result in more global diff --git a/tests/testthat/test_downstream.R b/tests/testthat/test_downstream.R index c714ddcf..e6754e60 100644 --- a/tests/testthat/test_downstream.R +++ b/tests/testthat/test_downstream.R @@ -65,7 +65,7 @@ context("Clustering") test_that("clustering", { skip_if_not(has_RcppPlanc) pbmc <- process(pbmc, f = FALSE, q = FALSE) - expect_error(runCluster(pbmc), "No factor loading ") + expect_error(runCluster(pbmc), "No cell factor loading available") pbmc <- runOnlineINMF(pbmc, k = 20, minibatchSize = 100) expect_message(runCluster(pbmc, nRandomStarts = 1),