Add one more visualization function, fixes towards cran check

welch-lab · Oct 29, 2024 · 176c6ab · 176c6ab
1 parent 704a92d
commit 176c6ab
Show file tree

Hide file tree

Showing 37 changed files with 482 additions and 129 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -148,6 +148,7 @@ export(plotCellViolin)
 export(plotClusterDimRed)
 export(plotClusterFactorDot)
 export(plotClusterGeneDot)
+export(plotClusterGeneViolin)
 export(plotClusterProportions)
 export(plotDatasetDimRed)
 export(plotDensityDimRed)

diff --git a/NEWS.md b/NEWS.md
@@ -13,19 +13,20 @@
   - Pseudo-bulk should be easy because we are just aggregating cells.
   - Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method, which would also enhance RcppPlanc performance when running ANLS on H5 data.
 
-## rliger 2.0.99
+## rliger 2.1.0
 
 - Added `centroidAlign()` for new cell factor loading alignment method
 - Added `plotProportionBox()` for visualizing compositional analysis
+- Added `plotClusterGeneViolin()` for visualizing gene expression in clusters
 - Added `plotBarcodeRank()` for basic QC visualization
 - Added `plotPairwiseDEGHeatmap()` for visualizing pairwise DEG results
 - Added `plotGODot()` for visualizing GO enrichment results
 - Added `calcNMI()` for evaluating clustering results against ground truth
-- Added `ligerToH5AD()` allowing reticulate/Python free export of liger object to H5AD format. This might not be releasable due to the need of calling non-exported functions from *hdf5r* library.
+- Added `ligerToH5AD()` allowing reticulate/Python free export of liger object to H5AD format. This is presented in extension source code (i.e. not loaded with `library(rliger)`).
 - Added organism support in `runGeneralQC()` and refined hemoglobin gene matching regex pattern.
 - Optimized DE test memory usage scalability for both pseudo-bulk method and wilcoxon test
 - Optimized `plotProportionPie()` by adding argument `circleColors`
-- Optimized `plotVolcano()` text annotation positioning
+- Optimized `plotVolcano()` text annotation positioning and gene highlighting logic.
 - Optimized visualization function additional argument documentation
 - Changed `runMarkerDEG()` and `runPairwiseDEG()` default method from `"wilcoxon"` to `"pseudoBulk"`
 - Fixed `runMarkerDEG(method = "pseudobulk")` bug in assigning pseudo-replicates, and optimized error/warning signaling.

diff --git a/R/DEG_marker.R b/R/DEG_marker.R
@@ -1080,15 +1080,11 @@ computePval <- function(ustat, ties, N, n1n2) {
 #' @param column_title Title on the column. Default \code{NULL}.
 #' @inheritDotParams plotGeneHeatmap cellAnnotation
 #' @inheritDotParams .plotHeatmap transpose showCellLabel showCellLegend showFeatureLabel cellAnnColList featureAnnColList scale trim baseSize cellTextSize featureTextSize cellTitleSize featureTitleSize legendTextSize legendTitleSize viridisOption viridisDirection RColorBrewerOption
-#' @return A \linkS4class{HeatmapList} object.
+#' @return A \link[ComplexHeatmap]{HeatmapList-class} object.
 #' @examples
 #' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
-#' markerTable <- runMarkerDEG(
-#'     pbmc,
-#'     minCellPerRep = 5
-#' )
 #' pbmc <- normalize(pbmc)
-#' plotMarkerHeatmap(pbmc, markerTable)
+#' plotMarkerHeatmap(pbmc, deg.marker)
 plotMarkerHeatmap <- function(
         object,
         result,
@@ -1183,18 +1179,11 @@ plotMarkerHeatmap <- function(
 #' @param column_title Title on the column. Default \code{NULL}.
 #' @param seed Random seed for reproducibility. Default \code{1}.
 #' @inheritDotParams .plotHeatmap transpose showCellLabel showCellLegend showFeatureLabel cellAnnColList featureAnnColList scale trim baseSize cellTextSize featureTextSize cellTitleSize featureTitleSize legendTextSize legendTitleSize viridisOption viridisDirection RColorBrewerOption
-#' @return A \linkS4class{HeatmapList} object.
+#' @return A \link[ComplexHeatmap]{HeatmapList-class} object.
 #' @examples
 #' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
-#' degTest <- runPairwiseDEG(
-#'     pbmc,
-#'     groupTest = "stim",
-#'     groupCtrl = "ctrl",
-#'     variable1 = "dataset",
-#'     splitBy = "defaultCluster"
-#' )
 #' pbmc <- normalize(pbmc)
-#' plotPairwiseDEGHeatmap(pbmc, degTest, '4.stim')
+#' plotPairwiseDEGHeatmap(pbmc, deg.pw, '4.stim')
 plotPairwiseDEGHeatmap <- function(
         object,
         result,

diff --git a/R/GSEA.R b/R/GSEA.R
@@ -144,10 +144,19 @@ runGSEA <- function(
 #' up-regulated genes and should be preferred when \code{result} comes from
 #' marker detection test. When \code{result} comes from group-to-group DE test,
 #' it is recommended to set \code{splitReg = TRUE}.
-#' @param ... Additional arguments passed to \code{gprofiler2::gost()}.
+#' @param ... Additional arguments passed to \code{gprofiler2::gost()}. Useful
+#' ones are:
+#'
+#' \describe{
+#' \item{\code{organism}}{The organism to be used for the analysis. "hsapiens"
+#' for human, "mmusculus" for mouse.}
+#' \item{\code{evcodes}}{Whether to include overlapping genes for each term.
+#' Default \code{FALSE}.}
+#' \item{\code{significant}}{Whether to filter out non-significant terms.
+#' Default \code{TRUE}.}
+#' }
 #' Arguments \code{query}, \code{custom_bg}, \code{domain_scope}, and
-#' \code{ordered_query} are pre-specified by this wrapper function. Users must
-#' set \code{organism = "mmusculus"} when working on mouse data.
+#' \code{ordered_query} are pre-specified by this wrapper function.
 #' @references Kolberg, L. et al, 2020 and Raudvere, U. et al, 2019
 #' @return A list object where each element is a result list for a group. Each
 #' result list contains two elements:
@@ -157,22 +166,11 @@ runGSEA <- function(
 #' See \code{gprofiler2::gost()}. for detailed explanation.
 #' @export
 #' @examples
-#' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
-#' # Test the DEG between "stim" and "ctrl", within each cluster
-#' result <- runPairwiseDEG(
-#'     pbmc,
-#'     groupTest = "stim",
-#'     groupCtrl = "ctrl",
-#'     variable1 = "dataset",
-#'     splitBy = "defaultCluster",
-#'     nPsdRep = 3,
-#'     minCellPerRep = 3
-#' )
 #' # Setting `significant = FALSE` because it's hard for a gene list obtained
 #' # from small test dataset to represent real-life biology.
 #' \donttest{
 #' if (requireNamespace("gprofiler2", quietly = TRUE)) {
-#'     go <- runGOEnrich(result, group = "0.stim", significant = FALSE)
+#'     go <- runGOEnrich(deg.pw, group = "0.stim", significant = FALSE)
 #' }
 #' }
 runGOEnrich <- function(

diff --git a/R/classConversion.R b/R/classConversion.R
@@ -272,7 +272,10 @@ as.ligerDataset.SingleCellExperiment <- function(
 #' "counts.stim". If \code{merge = TRUE}, return a single Seurat object with
 #' layers for all datasets merged.
 #' @examples
-#' seu <- ligerToSeurat(pbmc)
+#' if (requireNamespace("SeuratObject", quietly = TRUE) &&
+#'     requireNamespace("Seurat", quietly = TRUE)) {
+#'     seu <- ligerToSeurat(pbmc)
+#' }
 ligerToSeurat <- function(
         object,
         assay = NULL,

diff --git a/R/classes.R b/R/classes.R
@@ -167,7 +167,7 @@ setValidity("ligerDataset", .valid.ligerDataset)
 #' @slot datasets list of \linkS4class{ligerDataset} objects. Use generic
 #' \code{dataset}, \code{dataset<-}, \code{datasets} or \code{datasets<-} to
 #' interact with. See detailed section accordingly.
-#' @slot cellMeta \linkS4class{DFrame} object for cell metadata. Pre-existing
+#' @slot cellMeta \link[S4Vectors]{DFrame} object for cell metadata. Pre-existing
 #' metadata, QC metrics, cluster labeling and etc. are all stored here. Use
 #' generic \code{cellMeta}, \code{cellMeta<-}, \code{$}, \code{[[]]} or
 #' \code{[[]]<-} to interact with. See detailed section accordingly.

diff --git a/R/data.R b/R/data.R
@@ -22,3 +22,69 @@
 #' @source https://www.nature.com/articles/s41587-019-0332-7
 #' @references Jeffrey M. Granja and et. al., Nature Biotechnology, 2019
 "bmmc"
+
+#' Data frame for example marker DEG test result
+#' @description
+#' The data frame is the direct output of marker detection DEG test applied on
+#' example dataset which can be loaded with \code{data("pbmc")}. The DEG test
+#' was done with:
+#' ```
+#' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
+#' deg.marker <- runMarkerDEG(
+#'     pbmc,
+#'     minCellPerRep = 5
+#' )
+#' ````
+#' The result is for the marker detection test for 8 clusters in the dataset by
+#' comparing each cluster against all other clusters.
+#' @seealso [runMarkerDEG()]
+#' @format data.frame object of 1992 rows with columns:
+#' \itemize{
+#' \item feature: gene names, 249 unique genes repeated 8 times for the tests
+#' done for 8 clusters.
+#' \item group: cluster names, 8 unique cluster names, dividing the tests.
+#' \item logFC: log fold change of the gene expression between the cluster of
+#' interest against all other clusters.
+#' \item pval: p-value of the DEG test.
+#' \item padj: adjusted p-value of the DEG test.
+#' \item pct_in: percentage of cells in the cluster of interest expressing the
+#' gene.
+#' \item pct_out: percentage of cells in all other clusters expressing the gene.
+#' }
+"deg.marker"
+
+#' Data frame for example pairwise DEG test result
+#' @description
+#' The data frame is the direct output of pairwise DEG test applied on example
+#' dataset which can be loaded with \code{data("pbmc")}. The DEG test was done
+#' with:
+#' ```
+#' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
+#' degTest <- runPairwiseDEG(
+#'     pbmc,
+#'     groupTest = "stim",
+#'     groupCtrl = "ctrl",
+#'     variable1 = "dataset",
+#'     splitBy = "defaultCluster"
+#' )`
+#' ```
+#' The result is for the DEG test split for each cluster in the dataset, and
+#' within each cluster, compare the cells from "stim" against the cells from
+#' "ctrl".
+#' @seealso [runPairwiseDEG()]
+#' @format data.frame object of 1743 rows with columns:
+#' \itemize{
+#' \item feature: gene names, 249 unique genes repeated 7 times for the tests
+#' done for 7 clusters. (1 less cluster than in \code{\link{deg.marker}} due to
+#' too tiny sample size in the smallest cluster)
+#' \item group: cluster names, 7 unique cluster names, dividing the tests.
+#' \item logFC: log fold change of the gene expression between the condition of
+#' interest against the control condition.
+#' \item pval: p-value of the DEG test.
+#' \item padj: adjusted p-value of the DEG test.
+#' \item pct_in: percentage of cells in the condition of interest expressing the
+#' gene.
+#' \item pct_out: percentage of cells in the control condition expressing the
+#' gene.
+#' }
+"deg.pw"
diff --git a/R/ggplotting.R b/R/ggplotting.R
@@ -616,8 +616,10 @@ plotCellViolin <- function(
 #' \code{baseSize + 2}.
 #' @param subtitleSize,xTextSize,yTextSize,legendTextSize Size of subtitle text,
 #' axis texts and legend text. Default \code{NULL} controls by \code{baseSize}.
-#' @param xFacetSize,yFacetSize Size of facet label text. Default \code{NULL}
-#' controls by \code{baseSize - 2}.
+#' @param xFacetSize Size of facet strip label text on x-axis. Default
+#' \code{NULL} controls by \code{baseSize - 2}.
+#' @param yFacetSize Size of facet strip label text on y-axis. Default
+#' \code{NULL} controls by \code{baseSize - 2}.
 #' @param legendDotSize Allow dots in legend region to be large enough to see
 #' the colors/shapes clearly. Default \code{4}.
 #' @param panelBorder Whether to show rectangle border of the panel instead of
@@ -631,7 +633,7 @@ plotCellViolin <- function(
 #' presented, otherwise ggplot hues.
 #' @param legendNRow,legendNCol Integer, when too many categories in one
 #' variable, arranges number of rows or columns. Default \code{NULL},
-#' automatically split to \code{ceiling(levels(variable)/10)} columns.
+#' automatically split to \code{ceiling(levels(variable)/15)} columns.
 #' @param colorPalette For continuous coloring, an index or a palette name to
 #' select from available options from ggplot
 #' \code{\link[ggplot2]{scale_brewer}} or \code{\link[viridisLite]{viridis}}.

diff --git a/R/h5Utility.R b/R/h5Utility.R
@@ -361,17 +361,18 @@ closeAllH5.ligerDataset <- function(object) {
 #' Basing on the goal of the whole workflow, the data will always be written
 #' in a CSC matrix format and colnames/rownames are always required.
 #'
-#' The default method coerces the input to a \linkS4class{dgCMatrix}. Methods
-#' for other container classes tries to extract proper data and calls the
-#' default method.
+#' The default method coerces the input to a \link[Matrix]{dgCMatrix-class}
+#' object. Methods for other container classes tries to extract proper data and
+#' calls the default method.
 #' @param x An object with in-memory data to be written into H5 file.
 #' @param file A character string of the file path to be written.
 #' @param overwrite Logical, whether to overwrite the file if it already exists.
 #' Default \code{FALSE}.
 #' @param indicesPath,indptrPath,dataPath The paths inside the H5 file where
-#' the \linkS4class{dgCMatrix} constructor \code{i}, \code{p}, and \code{x} will
-#' be written to, respectively. Default using cellranger convention
-#' \code{"matrix/indices"}, \code{"matrix/indptr"}, and \code{"matrix/data"}.
+#' the \link[Matrix]{dgCMatrix-class} constructor \code{i}, \code{p}, and
+#' \code{x} will be written to, respectively. Default using cellranger
+#' convention \code{"matrix/indices"}, \code{"matrix/indptr"}, and
+#' \code{"matrix/data"}.
 #' @param shapePath The path inside the H5 file where the shape of the matrix
 #' will be written to. Default \code{"matrix/shape"}.
 #' @param barcodesPath The path inside the H5 file where the barcodes/colnames

diff --git a/R/integration.R b/R/integration.R
@@ -582,8 +582,9 @@ optimizeALS <- function( # nocov start
 #' matrices with a single Seurat object. We strongly recommend that users create
 #' a \linkS4class{liger} object which has the specific structure.
 #' @param object \linkS4class{liger} object. Scaled data required.
-#' @param newDatasets Named list of \linkS4class{dgCMatrix}. New datasets for
-#' scenario 2 or scenario 3. Default \code{NULL} triggers scenario 1.
+#' @param newDatasets Named list of \link[Matrix]{dgCMatrix-class} object. New
+#' datasets for scenario 2 or scenario 3. Default \code{NULL} triggers scenario
+#' 1.
 #' @param projection Whether to perform data integration with scenario 3 when
 #' \code{newDatasets} is specified. See description. Default \code{FALSE}.
 #' @param WInit,VInit,AInit,BInit Optional initialization for \eqn{W}, \eqn{V},

diff --git a/R/preprocess.R b/R/preprocess.R
@@ -1339,7 +1339,7 @@ selectGenesVST <- function(
 #' done by doing \code{scaleNotCenter(lig, useDataset = c("other", "datasets"))},
 #' and then \code{\link{reverseMethData}(lig, useDataset = c("meth", "datasets"))}.
 #' @param object \linkS4class{liger} object, \linkS4class{ligerDataset} object,
-#' \linkS4class{dgCMatrix}, or a Seurat object.
+#' \link[Matrix]{dgCMatrix-class} object, or a Seurat object.
 #' @param ... Arguments passed to other methods. The order goes by: "liger"
 #' method calls "ligerDataset" method", which then calls "dgCMatrix" method.
 #' "Seurat" method directly calls "dgCMatrix" method.

diff --git a/R/util.R b/R/util.R
@@ -447,7 +447,7 @@ cli_or <- function(x) cli::cli_vec(x, list("vec-last" = " or "))
             cli::cli_abort(
                 "Package {.pkg scattermore} is needed for rasterizing the scatter
                 plot. Please install it by command:
-                {.code BiocManager::install('scattermore')}"
+                {.code install.packages('scattermore')}"
             )
         }
     }
@@ -679,3 +679,27 @@ searchH <- function(object, useRaw = NULL) {
     }
     return(list(H = H, useRaw = useRaw))
 }
+
+
+.pivot_longer <- function(
+        data,
+        cols,
+        names_to = "name",
+        values_to = "value"
+) {
+    if (is.numeric(cols) || is.logical(cols)) cols <- colnames(data)[cols]
+    if (!is.character(cols))
+        cli::cli_abort("`cols` should be a character vector.")
+    keeps <- setdiff(colnames(data), cols)
+    blocks <- lapply(cols, function(col) {
+        len <- nrow(data)
+        blockData <- list()
+        blockData[[names_to]] <- rep(col, len)
+        blockData[[values_to]] <- data[[col]]
+        for (keep in keeps) {
+            blockData[[keep]] <- data[[keep]]
+        }
+        as.data.frame(blockData)
+    })
+    do.call(rbind, blocks)
+}