Merge pull request #302 from mvfki/newObj

Collective updates
welch-lab · Mar 6, 2024 · 14eb783 · 14eb783
2 parents 4e376d2 + e036aaa
commit 14eb783
Show file tree

Hide file tree

Showing 66 changed files with 3,380 additions and 3,173 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,13 +39,13 @@ VignetteBuilder: knitr
 Encoding: UTF-8
 LinkingTo: Rcpp, RcppArmadillo, RcppEigen, RcppProgress
 Depends:
-    Matrix,
     methods,
     stats,
     utils,
     R (>= 3.4)
 Imports:
     circlize,
+    cli,
     cowplot,
     ComplexHeatmap,
     dplyr,
@@ -55,6 +55,7 @@ Imports:
     leidenAlg (>= 1.1.1),
     lifecycle,
     magrittr,
+    Matrix,
     RANN,
     RColorBrewer,
     Rcpp,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method("[",liger)
 S3method("[",ligerDataset)
 S3method("[[",liger)
 S3method(.DollarNames,liger)
@@ -189,7 +190,6 @@ exportClasses(ligerRNADataset)
 exportClasses(ligerSpatialDataset)
 exportMethods("$")
 exportMethods("$<-")
-exportMethods("[")
 exportMethods("cellMeta<-")
 exportMethods("coordinate<-")
 exportMethods("dataset<-")
@@ -234,6 +234,9 @@ exportMethods(scaleUnsharedData)
 exportMethods(show)
 exportMethods(varFeatures)
 exportMethods(varUnsharedFeatures)
+importClassesFrom(Matrix,dgCMatrix)
+importClassesFrom(Matrix,dgTMatrix)
+importClassesFrom(Matrix,dgeMatrix)
 importClassesFrom(S4Vectors,DataFrame)
 importFrom(Matrix,colSums)
 importFrom(Matrix,rowSums)

diff --git a/R/ATAC.R b/R/ATAC.R
@@ -50,33 +50,28 @@ imputeKNN <- function(
         knn_k = nNeighbors
 ) {
     .deprecateArgs(list(knn_k = "nNeighbors"), defunct = "scale")
-    # if (!requireNamespace("FNN", quietly = TRUE)) {
-    #     stop("Package \"foreach\" needed for this function to work. ",
-    #          "Please install it by command:\n",
-    #          "install.packages('FNN')",
-    #          call. = FALSE)
-    # }
     if (is.null(getMatrix(object, "H.norm")))
-        stop("Aligned factor loading has to be available for imputation. ",
-             "Please run `quantileNorm()` in advance.")
-
-    if (length(reference) > 1) {
-        stop("Can only have ONE reference dataset")
-    }
-    reference <- .checkUseDatasets(object, reference)
-    if (!inherits(dataset(object, reference), "ligerATACDataset"))
-        stop("Selected reference should be ATAC dataset.")
+        cli::cli_abort(
+            "Aligned factor loading has to be available for imputation.
+            Please run {.fn quantileNorm} in advance.")
+    reference <- .checkArgLen(reference, n = 1)
+    reference <- .checkUseDatasets(object, reference)#, modal = "atac")
     queries <- .checkUseDatasets(object, queries)
     if (any(queries %in% reference)) {
-        warning("Reference dataset cannot be inclued in the query ",
-                "datasets. Removed from query list.")
+        cli::cli_alert_warning(
+            "Reference dataset cannot be inclued in the query datasets."
+        )
+        cli::cli_alert_warning(
+            "Removed from query list: {.val {queries[queries %in% reference]}}"
+        )
         queries <- queries[!queries %in% reference]
     }
     object <- recordCommand(object, ..., dependencies = c("RANN", "Matrix"))
     if (isTRUE(verbose)) {
-        .log("Imputing all the datasets exept the reference dataset\n",
-             "Reference dataset: ", reference, "\n",
-             "Query datasets: ", paste(queries, collapse = ", "))
+        cli::cli_alert_info(
+            "Imputing {length(queries)} query dataset{?s}: {.val {queries}}"
+        )
+        cli::cli_alert_info("from reference dataset: {.val {reference}}")
     }
 
     referenceCells <- colnames(dataset(object, reference))
@@ -194,35 +189,36 @@ linkGenesAndPeaks <- function(
 ) {
     ## check dependency
     if (!requireNamespace("GenomicRanges", quietly = TRUE))
-        stop("Package \"GenomicRanges\" needed for this function to work. ",
-             "Please install it by command:\n",
-             "BiocManager::install('GenomicRanges')",
-             call. = FALSE)
+        cli::cli_abort(
+            "Package {.pkg GenomicRanges} is needed for this function to work.
+            Please install it by command:
+            {.code BiocManager::install('GenomicRanges')}")
 
     if (!requireNamespace("IRanges", quietly = TRUE))
-        stop("Package \"IRanges\" needed for this function to work. ",
-             "Please install it by command:\n",
-             "BiocManager::install('IRanges')",
-             call. = FALSE)
+        cli::cli_abort(
+            "Package {.pkg IRanges} is needed for this function to work.
+            Please install it by command:
+            {.code BiocManager::install('IRanges')}")
     if (!requireNamespace("psych", quietly = TRUE))
-        stop("Package \"psych\" needed for this function to work. ",
-             "Please install it by command:\n",
-             "BiocManager::install('psych')",
-             call. = FALSE)
+        cli::cli_abort(
+            "Package {.pkg psych} is needed for this function to work.
+            Please install it by command:
+            {.code BiocManager::install('psych')}")
     .deprecateArgs(list(path_to_coords = "pathToCoords",
                         genes.list = "useGenes", dist = "method"))
     method <- match.arg(method)
-    if (length(useDataset) != 1)
-        stop("Please select only one dataset")
     useDataset <- .checkUseDatasets(object, useDataset)
+    if (length(useDataset) != 1)
+        cli::cli_abort("Please select only one dataset")
     lad <- dataset(object, useDataset)
     if (!inherits(lad, "ligerATACDataset"))
-        stop("Specified dataset is not of `ligerATACDataset` class. ",
-             "Please try `imputeKNN()` with `query = ", useDataset, "`. ")
+        cli::cli_abort(
+            "Specified dataset is not of `ligerATACDataset` class.
+            Please try {.fn imputeKNN} with `query = '{useDataset}'`.")
     if (is.null(normData(lad)))
-        stop("Normalized gene expression not found in specified dataset.")
+        cli::cli_abort("Normalized gene expression not found in specified dataset.")
     if (is.null(normPeak(lad)))
-        stop("Normalized peak counts not found in specified dataset.")
+        cli::cli_abort("Normalized peak counts not found in specified dataset.")
 
     ### make GRanges object for peaks
     peakCounts <- normPeak(lad)
@@ -257,23 +253,26 @@ linkGenesAndPeaks <- function(
     if (is.null(useGenes)) useGenes <- colnames(geneCounts)
     missingGenes <- !useGenes %in% names(genesCoords)
     if (sum(missingGenes) != 0 && isTRUE(verbose))
-        .log("Ignoring ",sum(missingGenes),
-             " genes not found in given gene coordinates")
-
+        cli::cli_alert_warning(
+            "Ignoring {sum(missingGenes)} genes not found in given gene coordinates"
+        )
     useGenes <- useGenes[!missingGenes]
-    if (length(useGenes) == 0)
-        stop("Number of genes to be tested equals 0. Please check input ",
-             "`useGenes` or the coordinate file.")
-    else {
-        .log(length(useGenes), " genes to be tested against ", ncol(peakCounts),
-             " peaks")
+    if (length(useGenes) == 0) {
+        cli::cli_abort(
+            "Number of genes to be tested equals 0. Please check input
+            {.code useGenes} or the coordinate file."
+        )
+    } else {
+        cli::cli_alert_info(
+            "{length(useGenes)} genes to be tested against {ncol(peakCounts)} peaks"
+        )
     }
     genesCoords <- genesCoords[useGenes]
 
     ### construct regnet
     if (isTRUE(verbose)) {
-        .log("Calculating correlation for gene-peak pairs...")
-        pb <- utils::txtProgressBar(0, length(useGenes), style = 3)
+        cli::cli_alert_info("Calculating correlation for gene-peak pairs...")
+        cli::cli_progress_bar("", total = length(useGenes), type = "iter")
     }
 
     # Result would be a sparse matrix, initialize the `i`, `p`, `x` vectors.
@@ -320,9 +319,10 @@ linkGenesAndPeaks <- function(
         ind <- c(ind, as.numeric(peaks.use))
         indp <- c(indp, as.numeric(eachLen))
         values <- c(values, res.corr)
-        if (isTRUE(verbose)) utils::setTxtProgressBar(pb, pos)
+        if (isTRUE(verbose)) {
+            cli::cli_progress_update(set = pos)
+        }
     }
-    if (isTRUE(verbose)) cat("\n")
     # make final sparse matrix
     regnet <-  Matrix::sparseMatrix(
         i = ind, p = c(0, indp), x = values,
@@ -379,20 +379,22 @@ exportInteractTrack <- function(
     if (is.null(useGenes)) {
         useGenes <- colnames(corrMat)
     } else if (any(!useGenes %in% colnames(corrMat))) {
-        .log("Removed ", sum(!useGenes %in% colnames(corrMat)), " genes not ",
-             "found in `corrMat`")
+        cli::cli_alert_warning(
+            "Removed {sum(!useGenes %in% colnames(corrMat))} genes not found in {.code corrMat}"
+        )
         useGenes <- useGenes[useGenes %in% colnames(corrMat)]
     }
     # Filter useGenes by significance
     geneSel <- Matrix::colSums(corrMat[, useGenes, drop = FALSE] != 0) > 0
     if (length(useGenes) - sum(geneSel) > 0)
-        .log("Totally ", length(useGenes) - sum(geneSel), " selected genes do ",
-             "not have significant correlated peaks, out of ", length(useGenes),
-             " selected genes")
+        cli::cli_alert_warning(
+            "Totally {length(useGenes) - sum(geneSel)} selected genes do not have significant correlated peaks, out of {length(useGenes)} selected genes",
+            wrap = TRUE
+        )
     useGenes <- useGenes[geneSel]
     if (length(useGenes) == 0) {
-        stop("No gene requested is either available or ",
-             "having significant correlated peaks. ")
+        cli::cli_abort("No gene requested is either available or having
+                       significant correlated peaks. ")
     }
 
     ### make GRanges object for genes
@@ -414,8 +416,6 @@ exportInteractTrack <- function(
     if (!file.exists(outputPath)) file.create(outputPath)
     outputPath <- normalizePath(outputPath)
 
-    .log("Writing result to: ", outputPath)
-
     # Start writing BED file
     trackDoc <- paste0('track type=interact name="Interaction Track" ',
                        'description="Gene-Peaks Links" ',
@@ -459,6 +459,8 @@ exportInteractTrack <- function(
             fileEncoding = ""
         )
     }
+    cli::cli_alert_success("Result written at: {.file {outputPath}}")
+    invisible(NULL)
 }
 
 #' [Deprecated] Export predicted gene-pair interaction

diff --git a/R/DEG_marker.R b/R/DEG_marker.R
@@ -96,7 +96,7 @@ runPairwiseDEG <- function(
         groups <- list(group1Idx, group2Idx)
         names(groups) <- c(group1Name, group2Name)
     } else {
-        stop("Please see `?runPairwiseDEG` for usage.")
+        cli::cli_abort("Please see {.code ?runPairwiseDEG} for usage.")
     }
     result <- .runDEG(object, groups = groups, method = method,
                       usePeak = usePeak, useReplicate = useReplicate,
@@ -161,7 +161,7 @@ runMarkerDEG <- function(
     allCellIdx <- seq(ncol(object))[object$dataset %in% useDatasets]
     conditionBy <- conditionBy %||% object@uns$defaultCluster
     if (is.null(conditionBy)) {
-        stop("No `conditionBy` given or default cluster not set.")
+        cli::cli_abort("No {.var conditionBy} given or default cluster not set.")
     }
     conditionBy <- .fetchCellMetaVar(
         object, conditionBy, cellIdx = allCellIdx,
@@ -241,6 +241,8 @@ runWilcoxon <- function(
 ) {
     method <- match.arg(method)
     allCellIdx <- unlist(groups)
+    if (length(allCellIdx) == 0)
+        cli::cli_abort(c(x = "No cell selected"))
     allCellBC <- colnames(object)[allCellIdx]
     datasetInvolve <- levels(object$dataset[allCellIdx, drop = TRUE])
     var <- factor(rep(names(groups), lengths(groups)), levels = names(groups))
@@ -258,8 +260,10 @@ runWilcoxon <- function(
     mat <- Reduce(cbind, dataList)
     mat <- mat[, allCellBC, drop = FALSE]
     if (method == "wilcoxon") {
+        cliID <- cli::cli_process_start("Running Wilcoxon rank-sum test")
         mat <- log1p(1e10*mat)
         result <- wilcoxauc(mat, var)
+        cli::cli_process_done(id = cliID)
     } else if (method == "pseudoBulk") {
         if (is.null(useReplicate)) {
             replicateAnn <- setupPseudoRep(var, nRep = nPsdRep,
@@ -292,27 +296,31 @@ runWilcoxon <- function(
 
 .DE.checkDataAvail <- function(object, useDatasets, method, usePeak) {
     if (isH5Liger(object, useDatasets)) { # nocov start
-        stop("HDF5 based datasets detected but is not supported. \n",
-             "Try `object.sub <- downsample(object, useSlot = ",
-             "'normData')` to create ANOTHER object with in memory data.")
+        cli::cli_abort(
+            c("HDF5 based datasets detected but is not supported. ",
+              "i" = "Try {.code object.sub <- downsample(object, useSlot = 'normData')} to create another object with in memory data")
+        )
     } # nocov end
     if (method == "wilcoxon") {
         slot <- ifelse(usePeak, "normPeak", "normData")
     } else if (method == "pseudoBulk") {
         if (!requireNamespace("DESeq2", quietly = TRUE)) # nocov start
-            stop("Package \"DESeq2\" needed for this function to work. ",
-                 "Please install it by command:\n",
-                 "BiocManager::install('DESeq2')",
-                 call. = FALSE) # nocov end
+            cli::cli_abort(
+                "Package {.pkg DESeq2} is needed for this function to work.
+                Please install it by command:
+                {.code BiocManager::install('DESeq2')}"
+            ) # nocov end
         slot <- ifelse(usePeak, "rawPeak", "rawData")
     }
     allAvail <- all(sapply(useDatasets, function(d) {
         ld <- dataset(object, d)
         !is.null(methods::slot(ld, slot))
     }))
     if (!allAvail)
-        stop(slot, " not all available for involved datasets. [method = \"",
-             method, "\", usePeak = ", usePeak, "]")
+        cli::cli_abort(
+            c("{.field {slot}} not all available for involved datasets: {.val {useDatasets}}",
+              "i" = "{.code method = '{method}'}; {.code usePeak = {usePeak}}")
+        )
     return(slot)
 }
 
@@ -345,9 +353,10 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
         subrep <- replicateAnn[replicateAnn$groups == gr,]
         splitLabel <- interaction(subrep, drop = TRUE)
         if (nlevels(splitLabel) < 2) {
-            stop("Too few replicates label for condition \"", gr, "\". ",
-                 "Cannot not create pseudo-bulks. Please use ",
-                 "consider creating pseudo-replicates or use wilcoxon instead.")
+            cli::cli_abort(
+                c("Too few replicates for condition {.val {gr}}. Cannot create pseudo-bulks.",
+                  "i" = "Please consider creating pseudo-replicates or using {.code method = 'wilcoxon'} instead.")
+            )
         }
     }
     splitLabel <- interaction(replicateAnn, drop = TRUE)
@@ -360,10 +369,9 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
     mat <- mat[, idx, drop = FALSE]
     replicateAnn <- replicateAnn[idx, , drop = FALSE]
     if (verbose) {
-        .log("Ignoring replicates with too few cells: ",
-             paste0(ignored, collapse = ", "))
-        .log("Replicate size:")
-        .log(paste0(levels(splitLabel), ": ", table(splitLabel), collapse = ", "), level = 2)
+        if (length(ignored) > 0) cli::cli_alert_warning("Ignoring replicates with too few cells: {.val {ignored}}")
+        cli::cli_alert_info("Replicate sizes:")
+        print(table(splitLabel))
     }
     pseudoBulks <- colAggregateSums_sparse(mat, as.integer(splitLabel) - 1,
                                            nlevels(splitLabel))
@@ -375,7 +383,7 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
 .callDESeq2 <- function(pseudoBulks, groups,
                          verbose = getOption("ligerVerbose")) {
     # DESeq2 workflow
-    if (isTRUE(verbose)) .log("Calling DESeq2 Wald test")
+    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Calling DESeq2 Wald test")
     ## NOTE: DESeq2 wishes that the contrast/control group is the first level
     ## whereas we required it as the second in upstream input. So we need to
     ## reverse it here.
@@ -396,6 +404,7 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
     res$group <- levels(groups)[2]
     res <- res[, c(7, 8, 2, 5, 6)]
     colnames(res) <- c("feature", "group", "logFC", "pval", "padj")
+    if (isTRUE(verbose)) cli::cli_process_done(id = cliID)
     return(res)
 }