Skip to content

Commit

Permalink
Merge pull request #302 from mvfki/newObj
Browse files Browse the repository at this point in the history
Collective updates
  • Loading branch information
mvfki authored Mar 6, 2024
2 parents 4e376d2 + e036aaa commit 14eb783
Show file tree
Hide file tree
Showing 66 changed files with 3,380 additions and 3,173 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ VignetteBuilder: knitr
Encoding: UTF-8
LinkingTo: Rcpp, RcppArmadillo, RcppEigen, RcppProgress
Depends:
Matrix,
methods,
stats,
utils,
R (>= 3.4)
Imports:
circlize,
cli,
cowplot,
ComplexHeatmap,
dplyr,
Expand All @@ -55,6 +55,7 @@ Imports:
leidenAlg (>= 1.1.1),
lifecycle,
magrittr,
Matrix,
RANN,
RColorBrewer,
Rcpp,
Expand Down
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method("[",liger)
S3method("[",ligerDataset)
S3method("[[",liger)
S3method(.DollarNames,liger)
Expand Down Expand Up @@ -189,7 +190,6 @@ exportClasses(ligerRNADataset)
exportClasses(ligerSpatialDataset)
exportMethods("$")
exportMethods("$<-")
exportMethods("[")
exportMethods("cellMeta<-")
exportMethods("coordinate<-")
exportMethods("dataset<-")
Expand Down Expand Up @@ -234,6 +234,9 @@ exportMethods(scaleUnsharedData)
exportMethods(show)
exportMethods(varFeatures)
exportMethods(varUnsharedFeatures)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgTMatrix)
importClassesFrom(Matrix,dgeMatrix)
importClassesFrom(S4Vectors,DataFrame)
importFrom(Matrix,colSums)
importFrom(Matrix,rowSums)
Expand Down
122 changes: 62 additions & 60 deletions R/ATAC.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,33 +50,28 @@ imputeKNN <- function(
knn_k = nNeighbors
) {
.deprecateArgs(list(knn_k = "nNeighbors"), defunct = "scale")
# if (!requireNamespace("FNN", quietly = TRUE)) {
# stop("Package \"foreach\" needed for this function to work. ",
# "Please install it by command:\n",
# "install.packages('FNN')",
# call. = FALSE)
# }
if (is.null(getMatrix(object, "H.norm")))
stop("Aligned factor loading has to be available for imputation. ",
"Please run `quantileNorm()` in advance.")

if (length(reference) > 1) {
stop("Can only have ONE reference dataset")
}
reference <- .checkUseDatasets(object, reference)
if (!inherits(dataset(object, reference), "ligerATACDataset"))
stop("Selected reference should be ATAC dataset.")
cli::cli_abort(
"Aligned factor loading has to be available for imputation.
Please run {.fn quantileNorm} in advance.")
reference <- .checkArgLen(reference, n = 1)
reference <- .checkUseDatasets(object, reference)#, modal = "atac")
queries <- .checkUseDatasets(object, queries)
if (any(queries %in% reference)) {
warning("Reference dataset cannot be inclued in the query ",
"datasets. Removed from query list.")
cli::cli_alert_warning(
"Reference dataset cannot be inclued in the query datasets."
)
cli::cli_alert_warning(
"Removed from query list: {.val {queries[queries %in% reference]}}"
)
queries <- queries[!queries %in% reference]
}
object <- recordCommand(object, ..., dependencies = c("RANN", "Matrix"))
if (isTRUE(verbose)) {
.log("Imputing all the datasets exept the reference dataset\n",
"Reference dataset: ", reference, "\n",
"Query datasets: ", paste(queries, collapse = ", "))
cli::cli_alert_info(
"Imputing {length(queries)} query dataset{?s}: {.val {queries}}"
)
cli::cli_alert_info("from reference dataset: {.val {reference}}")
}

referenceCells <- colnames(dataset(object, reference))
Expand Down Expand Up @@ -194,35 +189,36 @@ linkGenesAndPeaks <- function(
) {
## check dependency
if (!requireNamespace("GenomicRanges", quietly = TRUE))
stop("Package \"GenomicRanges\" needed for this function to work. ",
"Please install it by command:\n",
"BiocManager::install('GenomicRanges')",
call. = FALSE)
cli::cli_abort(
"Package {.pkg GenomicRanges} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('GenomicRanges')}")

if (!requireNamespace("IRanges", quietly = TRUE))
stop("Package \"IRanges\" needed for this function to work. ",
"Please install it by command:\n",
"BiocManager::install('IRanges')",
call. = FALSE)
cli::cli_abort(
"Package {.pkg IRanges} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('IRanges')}")
if (!requireNamespace("psych", quietly = TRUE))
stop("Package \"psych\" needed for this function to work. ",
"Please install it by command:\n",
"BiocManager::install('psych')",
call. = FALSE)
cli::cli_abort(
"Package {.pkg psych} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('psych')}")
.deprecateArgs(list(path_to_coords = "pathToCoords",
genes.list = "useGenes", dist = "method"))
method <- match.arg(method)
if (length(useDataset) != 1)
stop("Please select only one dataset")
useDataset <- .checkUseDatasets(object, useDataset)
if (length(useDataset) != 1)
cli::cli_abort("Please select only one dataset")
lad <- dataset(object, useDataset)
if (!inherits(lad, "ligerATACDataset"))
stop("Specified dataset is not of `ligerATACDataset` class. ",
"Please try `imputeKNN()` with `query = ", useDataset, "`. ")
cli::cli_abort(
"Specified dataset is not of `ligerATACDataset` class.
Please try {.fn imputeKNN} with `query = '{useDataset}'`.")
if (is.null(normData(lad)))
stop("Normalized gene expression not found in specified dataset.")
cli::cli_abort("Normalized gene expression not found in specified dataset.")
if (is.null(normPeak(lad)))
stop("Normalized peak counts not found in specified dataset.")
cli::cli_abort("Normalized peak counts not found in specified dataset.")

### make GRanges object for peaks
peakCounts <- normPeak(lad)
Expand Down Expand Up @@ -257,23 +253,26 @@ linkGenesAndPeaks <- function(
if (is.null(useGenes)) useGenes <- colnames(geneCounts)
missingGenes <- !useGenes %in% names(genesCoords)
if (sum(missingGenes) != 0 && isTRUE(verbose))
.log("Ignoring ",sum(missingGenes),
" genes not found in given gene coordinates")

cli::cli_alert_warning(
"Ignoring {sum(missingGenes)} genes not found in given gene coordinates"
)
useGenes <- useGenes[!missingGenes]
if (length(useGenes) == 0)
stop("Number of genes to be tested equals 0. Please check input ",
"`useGenes` or the coordinate file.")
else {
.log(length(useGenes), " genes to be tested against ", ncol(peakCounts),
" peaks")
if (length(useGenes) == 0) {
cli::cli_abort(
"Number of genes to be tested equals 0. Please check input
{.code useGenes} or the coordinate file."
)
} else {
cli::cli_alert_info(
"{length(useGenes)} genes to be tested against {ncol(peakCounts)} peaks"
)
}
genesCoords <- genesCoords[useGenes]

### construct regnet
if (isTRUE(verbose)) {
.log("Calculating correlation for gene-peak pairs...")
pb <- utils::txtProgressBar(0, length(useGenes), style = 3)
cli::cli_alert_info("Calculating correlation for gene-peak pairs...")
cli::cli_progress_bar("", total = length(useGenes), type = "iter")
}

# Result would be a sparse matrix, initialize the `i`, `p`, `x` vectors.
Expand Down Expand Up @@ -320,9 +319,10 @@ linkGenesAndPeaks <- function(
ind <- c(ind, as.numeric(peaks.use))
indp <- c(indp, as.numeric(eachLen))
values <- c(values, res.corr)
if (isTRUE(verbose)) utils::setTxtProgressBar(pb, pos)
if (isTRUE(verbose)) {
cli::cli_progress_update(set = pos)
}
}
if (isTRUE(verbose)) cat("\n")
# make final sparse matrix
regnet <- Matrix::sparseMatrix(
i = ind, p = c(0, indp), x = values,
Expand Down Expand Up @@ -379,20 +379,22 @@ exportInteractTrack <- function(
if (is.null(useGenes)) {
useGenes <- colnames(corrMat)
} else if (any(!useGenes %in% colnames(corrMat))) {
.log("Removed ", sum(!useGenes %in% colnames(corrMat)), " genes not ",
"found in `corrMat`")
cli::cli_alert_warning(
"Removed {sum(!useGenes %in% colnames(corrMat))} genes not found in {.code corrMat}"
)
useGenes <- useGenes[useGenes %in% colnames(corrMat)]
}
# Filter useGenes by significance
geneSel <- Matrix::colSums(corrMat[, useGenes, drop = FALSE] != 0) > 0
if (length(useGenes) - sum(geneSel) > 0)
.log("Totally ", length(useGenes) - sum(geneSel), " selected genes do ",
"not have significant correlated peaks, out of ", length(useGenes),
" selected genes")
cli::cli_alert_warning(
"Totally {length(useGenes) - sum(geneSel)} selected genes do not have significant correlated peaks, out of {length(useGenes)} selected genes",
wrap = TRUE
)
useGenes <- useGenes[geneSel]
if (length(useGenes) == 0) {
stop("No gene requested is either available or ",
"having significant correlated peaks. ")
cli::cli_abort("No gene requested is either available or having
significant correlated peaks. ")
}

### make GRanges object for genes
Expand All @@ -414,8 +416,6 @@ exportInteractTrack <- function(
if (!file.exists(outputPath)) file.create(outputPath)
outputPath <- normalizePath(outputPath)

.log("Writing result to: ", outputPath)

# Start writing BED file
trackDoc <- paste0('track type=interact name="Interaction Track" ',
'description="Gene-Peaks Links" ',
Expand Down Expand Up @@ -459,6 +459,8 @@ exportInteractTrack <- function(
fileEncoding = ""
)
}
cli::cli_alert_success("Result written at: {.file {outputPath}}")
invisible(NULL)
}

#' [Deprecated] Export predicted gene-pair interaction
Expand Down
47 changes: 28 additions & 19 deletions R/DEG_marker.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ runPairwiseDEG <- function(
groups <- list(group1Idx, group2Idx)
names(groups) <- c(group1Name, group2Name)
} else {
stop("Please see `?runPairwiseDEG` for usage.")
cli::cli_abort("Please see {.code ?runPairwiseDEG} for usage.")
}
result <- .runDEG(object, groups = groups, method = method,
usePeak = usePeak, useReplicate = useReplicate,
Expand Down Expand Up @@ -161,7 +161,7 @@ runMarkerDEG <- function(
allCellIdx <- seq(ncol(object))[object$dataset %in% useDatasets]
conditionBy <- conditionBy %||% object@uns$defaultCluster
if (is.null(conditionBy)) {
stop("No `conditionBy` given or default cluster not set.")
cli::cli_abort("No {.var conditionBy} given or default cluster not set.")
}
conditionBy <- .fetchCellMetaVar(
object, conditionBy, cellIdx = allCellIdx,
Expand Down Expand Up @@ -241,6 +241,8 @@ runWilcoxon <- function(
) {
method <- match.arg(method)
allCellIdx <- unlist(groups)
if (length(allCellIdx) == 0)
cli::cli_abort(c(x = "No cell selected"))
allCellBC <- colnames(object)[allCellIdx]
datasetInvolve <- levels(object$dataset[allCellIdx, drop = TRUE])
var <- factor(rep(names(groups), lengths(groups)), levels = names(groups))
Expand All @@ -258,8 +260,10 @@ runWilcoxon <- function(
mat <- Reduce(cbind, dataList)
mat <- mat[, allCellBC, drop = FALSE]
if (method == "wilcoxon") {
cliID <- cli::cli_process_start("Running Wilcoxon rank-sum test")
mat <- log1p(1e10*mat)
result <- wilcoxauc(mat, var)
cli::cli_process_done(id = cliID)
} else if (method == "pseudoBulk") {
if (is.null(useReplicate)) {
replicateAnn <- setupPseudoRep(var, nRep = nPsdRep,
Expand Down Expand Up @@ -292,27 +296,31 @@ runWilcoxon <- function(

.DE.checkDataAvail <- function(object, useDatasets, method, usePeak) {
if (isH5Liger(object, useDatasets)) { # nocov start
stop("HDF5 based datasets detected but is not supported. \n",
"Try `object.sub <- downsample(object, useSlot = ",
"'normData')` to create ANOTHER object with in memory data.")
cli::cli_abort(
c("HDF5 based datasets detected but is not supported. ",
"i" = "Try {.code object.sub <- downsample(object, useSlot = 'normData')} to create another object with in memory data")
)
} # nocov end
if (method == "wilcoxon") {
slot <- ifelse(usePeak, "normPeak", "normData")
} else if (method == "pseudoBulk") {
if (!requireNamespace("DESeq2", quietly = TRUE)) # nocov start
stop("Package \"DESeq2\" needed for this function to work. ",
"Please install it by command:\n",
"BiocManager::install('DESeq2')",
call. = FALSE) # nocov end
cli::cli_abort(
"Package {.pkg DESeq2} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('DESeq2')}"
) # nocov end
slot <- ifelse(usePeak, "rawPeak", "rawData")
}
allAvail <- all(sapply(useDatasets, function(d) {
ld <- dataset(object, d)
!is.null(methods::slot(ld, slot))
}))
if (!allAvail)
stop(slot, " not all available for involved datasets. [method = \"",
method, "\", usePeak = ", usePeak, "]")
cli::cli_abort(
c("{.field {slot}} not all available for involved datasets: {.val {useDatasets}}",
"i" = "{.code method = '{method}'}; {.code usePeak = {usePeak}}")
)
return(slot)
}

Expand Down Expand Up @@ -345,9 +353,10 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
subrep <- replicateAnn[replicateAnn$groups == gr,]
splitLabel <- interaction(subrep, drop = TRUE)
if (nlevels(splitLabel) < 2) {
stop("Too few replicates label for condition \"", gr, "\". ",
"Cannot not create pseudo-bulks. Please use ",
"consider creating pseudo-replicates or use wilcoxon instead.")
cli::cli_abort(
c("Too few replicates for condition {.val {gr}}. Cannot create pseudo-bulks.",
"i" = "Please consider creating pseudo-replicates or using {.code method = 'wilcoxon'} instead.")
)
}
}
splitLabel <- interaction(replicateAnn, drop = TRUE)
Expand All @@ -360,10 +369,9 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
mat <- mat[, idx, drop = FALSE]
replicateAnn <- replicateAnn[idx, , drop = FALSE]
if (verbose) {
.log("Ignoring replicates with too few cells: ",
paste0(ignored, collapse = ", "))
.log("Replicate size:")
.log(paste0(levels(splitLabel), ": ", table(splitLabel), collapse = ", "), level = 2)
if (length(ignored) > 0) cli::cli_alert_warning("Ignoring replicates with too few cells: {.val {ignored}}")
cli::cli_alert_info("Replicate sizes:")
print(table(splitLabel))
}
pseudoBulks <- colAggregateSums_sparse(mat, as.integer(splitLabel) - 1,
nlevels(splitLabel))
Expand All @@ -375,7 +383,7 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
.callDESeq2 <- function(pseudoBulks, groups,
verbose = getOption("ligerVerbose")) {
# DESeq2 workflow
if (isTRUE(verbose)) .log("Calling DESeq2 Wald test")
if (isTRUE(verbose)) cliID <- cli::cli_process_start("Calling DESeq2 Wald test")
## NOTE: DESeq2 wishes that the contrast/control group is the first level
## whereas we required it as the second in upstream input. So we need to
## reverse it here.
Expand All @@ -396,6 +404,7 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
res$group <- levels(groups)[2]
res <- res[, c(7, 8, 2, 5, 6)]
colnames(res) <- c("feature", "group", "logFC", "pval", "padj")
if (isTRUE(verbose)) cli::cli_process_done(id = cliID)
return(res)
}

Expand Down
Loading

0 comments on commit 14eb783

Please sign in to comment.