From 2056c23a6e505f269eb954d57e70251519bd1abb Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff Date: Mon, 29 Jul 2019 12:17:14 -0400 Subject: [PATCH 01/10] added wilcoxon rank sum test and function to make jaccard matrix --- R/liger.R | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 1 deletion(-) diff --git a/R/liger.R b/R/liger.R index 9095b910..7fb4fff8 100644 --- a/R/liger.R +++ b/R/liger.R @@ -20,12 +20,14 @@ #' matrix) #' @slot W Shared gene loading factors (k by genes) #' @slot V Dataset-specific gene loading factors (one matrix per dataset, dimensions k by genes) +#' @slot jaccard Holds Jaccard matrices for use in Louvain-Jaccard clustering and imputation #' @slot tsne.coords Matrix of 2D coordinates obtained from running t-SNE on H.norm or H matrices #' @slot alignment.clusters Initial joint cluster assignments from shared factor alignment #' @slot clusters Joint cluster assignments for cells #' @slot snf List of values associated with shared nearest factor matrix for use in clustering and #' alignment (out.summary contains edge weight information between cell combinations) #' @slot agg.data Data aggregated within clusters +#' @slot wilcoxon Data frame of Wilcoxon test output #' @slot parameters List of parameters used throughout analysis #' @slot version Version of package used to create object #' @@ -48,10 +50,12 @@ liger <- methods::setClass( H.norm = "matrix", W = "matrix", V = "list", + jaccard = "list", tsne.coords = "matrix", alignment.clusters = 'factor', clusters= "factor", agg.data = "list", + wilcoxon = "data.frame", parameters = "list", snf = 'list', version = 'ANY' @@ -3193,7 +3197,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' ligerex <- runTSNE(ligerex) #' # plot expression for CD4 and return plots #' gene_plots <- plotGene(ligerex, "CD4", return.plots = T) -#' } + plotGene <- function(object, gene, use.raw = F, methylation.indices = NULL, pt.size = 0.1, min.clip = 0, max.clip = 1, points.only = F, option = 'plasma', @@ -4246,3 +4250,183 @@ convertOldLiger = function(object, override.raw = F) { print(paste0('New slots not filled: ', setdiff(slots_new[slots_new != "cell.data"], slots))) return(new.liger) } + +#' Make Jaccard matrix +#' +#' @param object \code{liger} object. Should run scaleNotCenter first +#' +#' @return Updated \code{liger} object. +#' @export +#' @examples +#' \dontrun{ +#' # liger object, after running scaleNotCenter +#' ligerex +#' # add Jaccard matrix to a list for each dataset +#' ligerex <- runJaccard(ligerex) +#' } +runJaccard <- function(object){ + n = 1 + for(i in object@scale.data){ + i[i!=0] <- 1 + common_values <- i %*% t(i) + nonzero_value_index <- which(common_values > 0, arr.ind=TRUE) + nonzero_value <- common_values[nonzero_value_index] + row_sums <- rowSums(t(i)) + jaccard <- nonzero_value/(row_sums[nonzero_value_index[,1]] + +row_sums[nonzero_value_index[,1]] - nonzero_value) + object@jaccard[[n]] = sparseMatrix(i = nonzero_value_index[,1], + j = nonzero_value_index[,2], + x = jaccard, dims = dim(common_values)) + for(j in 1:nrow(object@jaccard[[n]])){ + object@jaccard[[n]][j,j] = 0 + } + object@jaccard[[n]][is.na(object@jaccard[[n]])] <- 0 + n = n + 1 + } + return(object) +} + +#' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +#' +#' @param object \code{liger} object. Should run quantileAlignSNF first +#' @param compare.datasets Split clusters by dataset of origin to compare differences in expression +#' (default TRUE) +#' @param clusters A list of clusters to include for comparison (default NULL). +#' +#' @return Updated \code{liger} object. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, clustering complete +#' ligerex +#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +#' ligerex <- calculateWilcoxon(ligerex) +#' } +#' +calculateWilcoxon <- function(object, compare.datasets = TRUE, clusters = NULL){ + if (!require("presto", quietly = TRUE)) { + stop("Package \"presto\" needed for this function to perform fast Wilcoxon rank sum test. Please install it.", + call. = FALSE + ) + } + + cluster_labels <- as.vector(object@clusters) + names(cluster_labels) <- rownames(as.data.frame(object@clusters)) + if(compare.datasets == TRUE){ + for(i in 1:length(object@scale.data)){ + cluster_labels[rownames(object@scale.data[[i]])] <- + toupper(paste0(cluster_labels[rownames(object@scale.data[[i]])],"_",unlist(attributes(ligex@scale.data)[1])[i])) + } + } + cluster_labels = factor(cluster_labels) + + #if clusters null, do for all, if cluster does not exist throw error + if(is.null(clusters)){ + clusters = levels(cluster_labels) + } else if(is.numeric(clusters) && compare.datasets){ + combinations <- expand.grid(clusters, unlist(attributes(ligex@scale.data)[1])) + for(i in 1:nrow(combinations)){ + clusters[i] = toupper(paste0(combinations[i,1],"_",combinations[i,2])) + } + } else if(length(clusters)==1){ + stop("Wilcoxon Rank Sum Test cannot be completed for one cluster. Please include 2 or more clusters.", + call = FALSE) + } else if(length(union(cluster_labels, clusters)) > length(unique(cluster_labels))){ + stop("Selected clusters do not match clusters in data. Please limit your list to clusters in the data.", + call = FALSE) + } + expression_mat = t(object@scale.data[[1]]) + for (i in 2:length(object@scale.data)){ + expression_mat <- cbind(expression_mat, t(object@scale.data[[i]])) + } + cluster_labels <- cluster_labels[cluster_labels %in% clusters] + expression_mat <- expression_mat[,names(cluster_labels)] + + object@wilcoxon <- wilcoxauc(expression_mat, cluster_labels) + + return(object) +} + +#' Subset a dataframe produced by calculateWilcox by feature, dataset, or column +#' +#' @param object \code{liger} object. Should run calculateWilcox first +#' @param features A list of features to include in the subset (default NULL) +#' @param datasets A list of datasets to include in the subset (defualt NULL). +#' @param columns A list of columns to include in the subset (default NULL). +#' @param return.subset Return the subsetted dataframe instead of a liger object (default FALSE) +#' @param save.subset Replace the unsubset dataframe with the subset (default TRUE) +#' +#' @return Updated \code{liger} object. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, after running CalculateWilcoxon +#' ligerex +#' # create lists of features, datasets, and columns to subset by +#' genes = c("UBBP4","KIAA1715","TMEM50B") +#' datasets = c("tenx") +#' # +#' ligerex <- subsetWilcoxon(ligerex, features = genes, datasets = datasets) +#' } +subsetWilcoxon <- function(object, features = NULL, datasets = NULL, columns = NULL, + return.subset = FALSE, save.subset = TRUE){ + if(!return.subset && !save.subset){ + return(object) + } + if(!is.null(columns)){ + columns = c("feature", "group", columns) + wilcoxon = object@wilcoxon[columns] + } + if(!is.null(features)){ + wilcoxon = object@wilcoxon[object@wilcoxon$feature == toupper(features)] + } + if(!is.null(datasets)){ + for(name in datasets){ + wilcoxon = object@wilcoxon[grepl(toupper(name), object@wilcoxon$group, fixed = TRUE)] + } + } + if(save.subset){ + object@wilcoxon = wilcoxon + } + if(return.subset){ + return(wilcoxon) + } + return(object) +} + +#' Make Jaccard matrix (will have use when imputation is implemented) +#' +#' @param object \code{liger} object. Should run scaleNotCenter first +#' +#' @return Updated \code{liger} object. +#' @export +#' @examples +#' \dontrun{ +#' # liger object, after running scaleNotCenter +#' ligerex +#' # add Jaccard matrix to a list for each dataset +#' ligerex <- runJaccard(ligerex) +#' } +runJaccard <- function(object){ + n = 1 + for(i in object@scale.data){ + i[i!=0] <- 1 + common_values <- i %*% t(i) + nonzero_value_index <- which(common_values > 0, arr.ind=TRUE) + nonzero_value <- common_values[nonzero_value_index] + row_sums <- rowSums(t(i)) + jaccard <- nonzero_value/(row_sums[nonzero_value_index[,1]] + +row_sums[nonzero_value_index[,1]] - nonzero_value) + object@jaccard[[n]] = sparseMatrix(i = nonzero_value_index[,1], + j = nonzero_value_index[,2], + x = jaccard, dims = dim(common_values)) + for(j in 1:nrow(object@jaccard[[n]])){ + object@jaccard[[n]][j,j] = 0 + } + object@jaccard[[n]][is.na(object@jaccard[[n]])] <- 0 + n = n + 1 + } + return(object) +} From 4d6411d561550ca549a0d6c4b4b7af3e089f5271 Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff <44811472+jsodicoff@users.noreply.github.com> Date: Mon, 29 Jul 2019 12:37:34 -0400 Subject: [PATCH 02/10] Added missing bracket from example. --- R/liger.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/liger.R b/R/liger.R index 7fb4fff8..26cc58c0 100644 --- a/R/liger.R +++ b/R/liger.R @@ -3197,6 +3197,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' ligerex <- runTSNE(ligerex) #' # plot expression for CD4 and return plots #' gene_plots <- plotGene(ligerex, "CD4", return.plots = T) +#' } plotGene <- function(object, gene, use.raw = F, methylation.indices = NULL, pt.size = 0.1, From 948b628d795bceaf84f78ad4f5b6447a2f8928b7 Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff <44811472+jsodicoff@users.noreply.github.com> Date: Fri, 9 Aug 2019 10:46:06 -0400 Subject: [PATCH 03/10] fixed incorrect variable names in calculateWilcoxon --- R/liger.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/liger.R b/R/liger.R index 26cc58c0..95971f2f 100644 --- a/R/liger.R +++ b/R/liger.R @@ -4287,6 +4287,24 @@ runJaccard <- function(object){ return(object) } +#' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +#' +#' @param object \code{liger} object. Should run quantileAlignSNF first +#' @param compare.datasets Split clusters by dataset of origin to compare differences in expression +#' (default TRUE) +#' @param clusters A list of clusters to include for comparison (default NULL). +#' +#' @return Updated \code{liger} object. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, clustering complete +#' ligerex +#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +#' ligerex <- calculateWilcoxon(ligerex) +#' } +#' #' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. #' #' @param object \code{liger} object. Should run quantileAlignSNF first @@ -4317,7 +4335,7 @@ calculateWilcoxon <- function(object, compare.datasets = TRUE, clusters = NULL){ if(compare.datasets == TRUE){ for(i in 1:length(object@scale.data)){ cluster_labels[rownames(object@scale.data[[i]])] <- - toupper(paste0(cluster_labels[rownames(object@scale.data[[i]])],"_",unlist(attributes(ligex@scale.data)[1])[i])) + toupper(paste0(cluster_labels[rownames(object@scale.data[[i]])],"_",unlist(attributes(object@scale.data)[1])[i])) } } cluster_labels = factor(cluster_labels) @@ -4326,7 +4344,7 @@ calculateWilcoxon <- function(object, compare.datasets = TRUE, clusters = NULL){ if(is.null(clusters)){ clusters = levels(cluster_labels) } else if(is.numeric(clusters) && compare.datasets){ - combinations <- expand.grid(clusters, unlist(attributes(ligex@scale.data)[1])) + combinations <- expand.grid(clusters, unlist(attributes(object@scale.data)[1])) for(i in 1:nrow(combinations)){ clusters[i] = toupper(paste0(combinations[i,1],"_",combinations[i,2])) } From 09bf17f3bdb597cf1d50d276d764c0f425cab520 Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff <44811472+jsodicoff@users.noreply.github.com> Date: Fri, 9 Aug 2019 11:12:12 -0400 Subject: [PATCH 04/10] Added necessary import and exports for calcWilcoxon --- NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 50b863a2..f05627ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(calcAlignment) export(calcAlignmentPerCluster) export(calcDatasetSpecificity) export(calcPurity) +export(calcWilcoxon) export(clusterLouvainJaccard) export(convertOldLiger) export(createLiger) @@ -101,5 +102,6 @@ importFrom(mclust,adjustedRandIndex) importFrom(methods,"slot<-") importFrom(plyr,mapvalues) importFrom(plyr,rbind.fill.matrix) +importFrom(presto,wilcoxauc) importFrom(riverplot,makeRiver) useDynLib(liger) From 5a8fc7b5d8173d6f63e61b445bb318e5235ff381 Mon Sep 17 00:00:00 2001 From: Joshua Welch Date: Fri, 16 Aug 2019 08:36:47 -0400 Subject: [PATCH 05/10] Fixed incorrect function name in NAMESPACE exports --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index f05627ad..3ed2e584 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,7 @@ export(calcAlignment) export(calcAlignmentPerCluster) export(calcDatasetSpecificity) export(calcPurity) -export(calcWilcoxon) +export(calculateWilcoxon) export(clusterLouvainJaccard) export(convertOldLiger) export(createLiger) From 9236b6ce9af99d593eb40929a2f559b79704fce2 Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Tue, 27 Aug 2019 16:32:10 -0400 Subject: [PATCH 06/10] update: calculateWilcoxon, plotWilcoxon --- NAMESPACE | 7 +- R/liger.R | 183 ++++++++++++++++++++++++++++----------- man/calculateWilcoxon.Rd | 32 +++++++ man/liger.Rd | 6 ++ man/plotWilcoxon.Rd | 28 ++++++ man/runJaccard.Rd | 39 +++++++++ man/subsetWilcoxon.Rd | 39 +++++++++ 7 files changed, 281 insertions(+), 53 deletions(-) create mode 100644 man/calculateWilcoxon.Rd create mode 100644 man/plotWilcoxon.Rd create mode 100644 man/runJaccard.Rd create mode 100644 man/subsetWilcoxon.Rd diff --git a/NAMESPACE b/NAMESPACE index 3ed2e584..d7a8e040 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,5 @@ +# Generated by roxygen2: do not edit by hand + S3method(SNF,liger) S3method(SNF,list) S3method(optimizeALS,liger) @@ -35,11 +37,13 @@ export(plotGene) export(plotGeneLoadings) export(plotGeneViolin) export(plotGenes) +export(plotWilcoxon) export(plotWordClouds) export(quantileAlignSNF) export(quantile_norm) export(read10X) export(removeMissingObs) +export(runJaccard) export(runTSNE) export(runUMAP) export(scaleNotCenter) @@ -47,12 +51,11 @@ export(scaleNotCenter_sparse) export(selectGenes) export(seuratToLiger) export(subsetLiger) +export(subsetWilcoxon) export(suggestK) export(suggestLambda) exportClasses(liger) import(doSNOW) -import(Matrix) -import(snow) importFrom(FNN,get.knn) importFrom(FNN,get.knnx) importFrom(RANN.L1,nn2) diff --git a/R/liger.R b/R/liger.R index 95971f2f..e466be51 100644 --- a/R/liger.R +++ b/R/liger.R @@ -27,7 +27,8 @@ #' @slot snf List of values associated with shared nearest factor matrix for use in clustering and #' alignment (out.summary contains edge weight information between cell combinations) #' @slot agg.data Data aggregated within clusters -#' @slot wilcoxon Data frame of Wilcoxon test output +#' @slot wilcoxon.compare.datasets Data frame of Wilcoxon test output for comparing datasets +#' @slot wilcoxon.compare.clusters Data frame of Wilcoxon test output for comparing clusters #' @slot parameters List of parameters used throughout analysis #' @slot version Version of package used to create object #' @@ -55,7 +56,8 @@ liger <- methods::setClass( alignment.clusters = 'factor', clusters= "factor", agg.data = "list", - wilcoxon = "data.frame", + wilcoxon.compare.datasets = "data.frame", + wilcoxon.compare.clusters = "data.frame", parameters = "list", snf = 'list', version = 'ANY' @@ -4287,12 +4289,13 @@ runJaccard <- function(object){ return(object) } + #' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. #' #' @param object \code{liger} object. Should run quantileAlignSNF first -#' @param compare.datasets Split clusters by dataset of origin to compare differences in expression +#' @param compare.datasets differential expression analysis for different datasets within in each cluster #' (default TRUE) -#' @param clusters A list of clusters to include for comparison (default NULL). +#' @param compare.clusters differential expression analysis for different clusters (default TRUE). #' #' @return Updated \code{liger} object. #' @export @@ -4305,68 +4308,146 @@ runJaccard <- function(object){ #' ligerex <- calculateWilcoxon(ligerex) #' } #' -#' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +calculateWilcoxon <- function(object, compare.datasets = TRUE, compare.clusters = TRUE){ + if (!require("presto", quietly = TRUE)) { + print("Package \"presto\" needed to perform fast Wilcoxon rank sum test. Installation started.") + library(devtools) + install_github('immunogenomics/presto') + } + + clusterIDs = levels(object@clusters) # unique cluster labels + cell_clusterID = as.vector(object@clusters) # cluster label for each cell + cell_ID = rownames(as.data.frame(object@clusters)) # cell label + source_name = attributes(object@scale.data)$names # label of different datasets + + num_source = length(source_name) # number of different datasets + num_clusters = length(clusterIDs) # number of clusters + + all_seq = c() + cell_source = c() + for (i in 1:num_source){ + all_seq = rbind(all_seq, object@scale.data[[i]]) + cell_source = c(cell_source, rep(source_name[i], nrow(object@scale.data[[i]]))) + } + cell_info = as.data.frame(cbind(cell_ID,cell_source,cell_clusterID)) + names(cell_info) = c("cell_id","source","cluster_id") + + + ### compare datasets and/or clusters + if (compare.datasets == TRUE) { + wilcoxon_compare_datasets = c() # save all the test results + + for (i in 1:length(clusterIDs)){ + cluster_i = cell_info[cell_info$cluster_id == clusterIDs[i], ] + seq_i = t(all_seq[rownames(all_seq) %in% cluster_i$cell_id, ]) # gene by cell matrix + wilcoxon_cluster_i = wilcoxauc(seq_i, cluster_i$source) + wilcoxon_cluster_i = cbind(rep(clusterIDs[i], nrow(wilcoxon_cluster_i)), wilcoxon_cluster_i) + wilcoxon_compare_datasets = rbind(wilcoxon_compare_datasets, wilcoxon_cluster_i) + } + + names(wilcoxon_compare_datasets)[1] = "cluster" + object@wilcoxon.compare.datasets = wilcoxon_compare_datasets + } + + if (compare.clusters == TRUE) { + wilcoxon_compare_clusters = c() # save all the test results + + for (i in 1:num_clusters){ + cluster_i_others = cell_info + cluster_i_others[cluster_i_others$cluster_id != clusterIDs[i], ]$cluster_id == "others" + seq_ij = t(all_seq) # gene by cell matrix + wilcoxon_cluster_i = wilcoxauc(seq_ij, cluster_i_others$cluster_id) + wilcoxon_cluster_i = wilcoxon_cluster_i[wilcoxon_cluster_i$group == clusterIDs[i], ] + wilcoxon_compare_clusters = rbind(wilcoxon_compare_clusters, wilcoxon_cluster_i) + } + object@wilcoxon.compare.clusters = wilcoxon_compare_clusters + } + return(object) +} + + + +#' Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. #' -#' @param object \code{liger} object. Should run quantileAlignSNF first -#' @param compare.datasets Split clusters by dataset of origin to compare differences in expression -#' (default TRUE) -#' @param clusters A list of clusters to include for comparison (default NULL). -#' +#' @param object \code{liger} object. After \code{calculateWilcoxon} is completed. +#' @param num_top number of top expressed genes to be displayed (default=10) #' @return Updated \code{liger} object. #' @export #' @importFrom presto wilcoxauc #' @examples #' \dontrun{ -#' # liger object, clustering complete +#' # liger object, Wilcoxon rank sum test complete #' ligerex #' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test -#' ligerex <- calculateWilcoxon(ligerex) +#' p <- plotWilcoxon(ligerex) #' } #' -calculateWilcoxon <- function(object, compare.datasets = TRUE, clusters = NULL){ - if (!require("presto", quietly = TRUE)) { - stop("Package \"presto\" needed for this function to perform fast Wilcoxon rank sum test. Please install it.", - call. = FALSE - ) +plotWilcoxon <- function(object, num_top = 10){ + clusterIDs = levels(object@clusters) + num_clusters = length(clusterIDs) + clusters = list() + top_genes = c() # store signature genes for each cluster + + for (i in 1:num_clusters){ + clusters[[i]] = wilcoxon_compare_clusters[wilcoxon_compare_clusters$group == clusterIDs[i], ] + clusters[[i]] = clusters[[i]][order(clusters[[i]]$padj), ] + names(clusters[[i]])[3] = paste("cluster_",clusterIDs[i], sep = "") + top_genes = union(top_genes, clusters[[i]][1:num_top, "feature"]) } + + top_genes = data.frame(feature = top_genes) + + clusters_expression = merge(top_genes, clusters[[1]][ ,c("feature", paste("cluster_","0", sep = ""))], by = "feature", by.x = T, sort=F) - cluster_labels <- as.vector(object@clusters) - names(cluster_labels) <- rownames(as.data.frame(object@clusters)) - if(compare.datasets == TRUE){ - for(i in 1:length(object@scale.data)){ - cluster_labels[rownames(object@scale.data[[i]])] <- - toupper(paste0(cluster_labels[rownames(object@scale.data[[i]])],"_",unlist(attributes(object@scale.data)[1])[i])) - } + for (i in 2:num_clusters){ + clusters_expression = merge(clusters_expression, clusters[[i]][ ,c("feature", paste("cluster_",clusterIDs[i], sep = ""))], + by = "feature", sort=F, no.dups = F) } - cluster_labels = factor(cluster_labels) - - #if clusters null, do for all, if cluster does not exist throw error - if(is.null(clusters)){ - clusters = levels(cluster_labels) - } else if(is.numeric(clusters) && compare.datasets){ - combinations <- expand.grid(clusters, unlist(attributes(object@scale.data)[1])) - for(i in 1:nrow(combinations)){ - clusters[i] = toupper(paste0(combinations[i,1],"_",combinations[i,2])) - } - } else if(length(clusters)==1){ - stop("Wilcoxon Rank Sum Test cannot be completed for one cluster. Please include 2 or more clusters.", - call = FALSE) - } else if(length(union(cluster_labels, clusters)) > length(unique(cluster_labels))){ - stop("Selected clusters do not match clusters in data. Please limit your list to clusters in the data.", - call = FALSE) - } - expression_mat = t(object@scale.data[[1]]) - for (i in 2:length(object@scale.data)){ - expression_mat <- cbind(expression_mat, t(object@scale.data[[i]])) - } - cluster_labels <- cluster_labels[cluster_labels %in% clusters] - expression_mat <- expression_mat[,names(cluster_labels)] - - object@wilcoxon <- wilcoxauc(expression_mat, cluster_labels) - - return(object) + + expr_mat = as.matrix(clusters_expression[,-1]) + rownames(expr_mat) = top_genes$feature + + # Josh O'Brien @ http://stackoverflow.com/questions/15505607 + draw_colnames_45 = function (coln, gaps, ...) { + coord = pheatmap:::find_coordinates(length(coln), gaps) + x = coord$coord - 0.5 * coord$size + res = grid::textGrob(coln, x = x, y = unit(1, "npc") - unit(3,"bigpts"), + vjust = 0.75, hjust = 1, rot = 45, gp = grid::gpar(...) + ) + return(res) + } + + assignInNamespace( + x = "draw_colnames", + value = "draw_colnames_45", + ns = asNamespace("pheatmap") + ) + + quantile_breaks <- function(mat, n = 10) { + breaks <- quantile(mat, probs = seq(0, 1, length.out = n)) + breaks[!duplicated(breaks)] + } + + mat_breaks <- quantile_breaks(expr_mat, n = 30) + + + + pheatmap(expr_mat, + cutree_cols = num_clusters, + breaks = mat_breaks, + #cluster_rows = FALSE, + #cluster_cols = FALSE, + treeheight_row = 0, + treeheight_col = 0, + color = inferno(30), + #cex = 1.5, + legend = T, + fontsize = 8, + main = "Gene expression comparison among clusters") } + + #' Subset a dataframe produced by calculateWilcox by feature, dataset, or column #' #' @param object \code{liger} object. Should run calculateWilcox first diff --git a/man/calculateWilcoxon.Rd b/man/calculateWilcoxon.Rd new file mode 100644 index 00000000..65e71a5d --- /dev/null +++ b/man/calculateWilcoxon.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{calculateWilcoxon} +\alias{calculateWilcoxon} +\title{Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test.} +\usage{ +calculateWilcoxon(object, compare.datasets = TRUE, + compare.clusters = TRUE) +} +\arguments{ +\item{object}{\code{liger} object. Should run quantileAlignSNF first} + +\item{compare.datasets}{differential expression analysis for different datasets within in each cluster +(default TRUE)} + +\item{compare.clusters}{differential expression analysis for different clusters (default TRUE).} +} +\value{ +Updated \code{liger} object. +} +\description{ +Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +} +\examples{ +\dontrun{ +# liger object, clustering complete +ligerex +# fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +ligerex <- calculateWilcoxon(ligerex) +} + +} diff --git a/man/liger.Rd b/man/liger.Rd index 3f2e30ca..183d635b 100644 --- a/man/liger.Rd +++ b/man/liger.Rd @@ -38,6 +38,8 @@ matrix)} \item{\code{V}}{Dataset-specific gene loading factors (one matrix per dataset, dimensions k by genes)} +\item{\code{jaccard}}{Holds Jaccard matrices for use in Louvain-Jaccard clustering and imputation} + \item{\code{tsne.coords}}{Matrix of 2D coordinates obtained from running t-SNE on H.norm or H matrices} \item{\code{alignment.clusters}}{Initial joint cluster assignments from shared factor alignment} @@ -49,6 +51,10 @@ alignment (out.summary contains edge weight information between cell combination \item{\code{agg.data}}{Data aggregated within clusters} +\item{\code{wilcoxon.compare.datasets}}{Data frame of Wilcoxon test output for comparing datasets} + +\item{\code{wilcoxon.compare.clusters}}{Data frame of Wilcoxon test output for comparing clusters} + \item{\code{parameters}}{List of parameters used throughout analysis} \item{\code{version}}{Version of package used to create object} diff --git a/man/plotWilcoxon.Rd b/man/plotWilcoxon.Rd new file mode 100644 index 00000000..c7562c3a --- /dev/null +++ b/man/plotWilcoxon.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{plotWilcoxon} +\alias{plotWilcoxon} +\title{Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster.} +\usage{ +plotWilcoxon(object, num_top = 10) +} +\arguments{ +\item{object}{\code{liger} object. After \code{calculateWilcoxon} is completed.} + +\item{num_top}{number of top expressed genes to be displayed (default=10)} +} +\value{ +Updated \code{liger} object. +} +\description{ +Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. +} +\examples{ +\dontrun{ +# liger object, Wilcoxon rank sum test complete +ligerex +# fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +p <- plotWilcoxon(ligerex) +} + +} diff --git a/man/runJaccard.Rd b/man/runJaccard.Rd new file mode 100644 index 00000000..4d58c30b --- /dev/null +++ b/man/runJaccard.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{runJaccard} +\alias{runJaccard} +\title{Make Jaccard matrix} +\usage{ +runJaccard(object) + +runJaccard(object) +} +\arguments{ +\item{object}{\code{liger} object. Should run scaleNotCenter first} + +\item{object}{\code{liger} object. Should run scaleNotCenter first} +} +\value{ +Updated \code{liger} object. + +Updated \code{liger} object. +} +\description{ +Make Jaccard matrix + +Make Jaccard matrix (will have use when imputation is implemented) +} +\examples{ +\dontrun{ +# liger object, after running scaleNotCenter +ligerex +# add Jaccard matrix to a list for each dataset +ligerex <- runJaccard(ligerex) +} +\dontrun{ +# liger object, after running scaleNotCenter +ligerex +# add Jaccard matrix to a list for each dataset +ligerex <- runJaccard(ligerex) +} +} diff --git a/man/subsetWilcoxon.Rd b/man/subsetWilcoxon.Rd new file mode 100644 index 00000000..829d19c3 --- /dev/null +++ b/man/subsetWilcoxon.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{subsetWilcoxon} +\alias{subsetWilcoxon} +\title{Subset a dataframe produced by calculateWilcox by feature, dataset, or column} +\usage{ +subsetWilcoxon(object, features = NULL, datasets = NULL, + columns = NULL, return.subset = FALSE, save.subset = TRUE) +} +\arguments{ +\item{object}{\code{liger} object. Should run calculateWilcox first} + +\item{features}{A list of features to include in the subset (default NULL)} + +\item{datasets}{A list of datasets to include in the subset (defualt NULL).} + +\item{columns}{A list of columns to include in the subset (default NULL).} + +\item{return.subset}{Return the subsetted dataframe instead of a liger object (default FALSE)} + +\item{save.subset}{Replace the unsubset dataframe with the subset (default TRUE)} +} +\value{ +Updated \code{liger} object. +} +\description{ +Subset a dataframe produced by calculateWilcox by feature, dataset, or column +} +\examples{ +\dontrun{ +# liger object, after running CalculateWilcoxon +ligerex +# create lists of features, datasets, and columns to subset by +genes = c("UBBP4","KIAA1715","TMEM50B") +datasets = c("tenx") +# +ligerex <- subsetWilcoxon(ligerex, features = genes, datasets = datasets) +} +} From 5c8f17786d5369b34450a228a890151d1a54c2e7 Mon Sep 17 00:00:00 2001 From: cgao90 Date: Wed, 28 Aug 2019 00:36:01 -0400 Subject: [PATCH 07/10] add import(matrix), import(snow), imports of viridis, pheatmap --- DESCRIPTION | 7 +- NAMESPACE | 7 +- R/liger.R | 159 +++++++++++++++++++++++--------------------- man/plotWilcoxon.Rd | 2 +- 4 files changed, 93 insertions(+), 82 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 14445cc4..4185e33f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,14 +28,17 @@ Imports: Rcpp (>= 0.12.10), snow, doSNOW, mclust, - patchwork + patchwork, + viridis, + pheatmap Remotes: thomasp85/patchwork LinkingTo: Rcpp, RcppArmadillo Depends: cowplot, Matrix, methods, - patchwork + patchwork, + grid RoxygenNote: 6.1.1 Suggests: Seurat, diff --git a/NAMESPACE b/NAMESPACE index d7a8e040..7fd69949 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,3 @@ -# Generated by roxygen2: do not edit by hand - S3method(SNF,liger) S3method(SNF,list) S3method(optimizeALS,liger) @@ -56,6 +54,8 @@ export(suggestK) export(suggestLambda) exportClasses(liger) import(doSNOW) +import(Matrix) +import(snow) importFrom(FNN,get.knn) importFrom(FNN,get.knnx) importFrom(RANN.L1,nn2) @@ -99,6 +99,7 @@ importFrom(ggrepel,geom_text_repel) importFrom(grid,gpar) importFrom(grid,roundrectGrob) importFrom(grid,unit) +importFrom(grid,textGrob) importFrom(ica,icafast) importFrom(irlba,prcomp_irlba) importFrom(mclust,adjustedRandIndex) @@ -107,4 +108,6 @@ importFrom(plyr,mapvalues) importFrom(plyr,rbind.fill.matrix) importFrom(presto,wilcoxauc) importFrom(riverplot,makeRiver) +importFrom(viridis, inferno) +importFrom(pheatmap, pheatmap) useDynLib(liger) diff --git a/R/liger.R b/R/liger.R index e466be51..ce7485cd 100644 --- a/R/liger.R +++ b/R/liger.R @@ -3793,6 +3793,88 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh return(output_list) } + + +#' Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. +#' +#' @param object \code{liger} object. After \code{calculateWilcoxon} is completed. +#' @param num_top number of top expressed genes to be displayed (default=10) +#' @return a heatmap comparing the average expression values of selected top genes. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, Wilcoxon rank sum test complete +#' ligerex +#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +#' p <- plotWilcoxon(ligerex) +#' } +#' +plotWilcoxon <- function(object, num_top = 10){ + clusterIDs = levels(object@clusters) + num_clusters = length(clusterIDs) + clusters = list() + top_genes = c() # store signature genes for each cluster + + for (i in 1:num_clusters){ + clusters[[i]] = object@wilcoxon.compare.clusters[object@wilcoxon.compare.clusters$group == clusterIDs[i], ] + clusters[[i]] = clusters[[i]][order(clusters[[i]]$padj), ] + names(clusters[[i]])[3] = paste("cluster_",clusterIDs[i], sep = "") + top_genes = union(top_genes, clusters[[i]][1:num_top, "feature"]) + } + + top_genes = data.frame(feature = top_genes) + + clusters_expression = merge(top_genes, clusters[[1]][ ,c("feature", paste("cluster_","0", sep = ""))], by = "feature", by.x = T, sort=F) + + for (i in 2:num_clusters){ + clusters_expression = merge(clusters_expression, clusters[[i]][ ,c("feature", paste("cluster_",clusterIDs[i], sep = ""))], + by = "feature", sort=F) + } + + expr_mat = as.matrix(clusters_expression[,-1]) + rownames(expr_mat) = top_genes$feature + + mat_breaks = quantile_breaks(expr_mat, n = 50) + + assignInNamespace( + x = "draw_colnames", + value = "liger:::draw_colnames_45", + ns = asNamespace("pheatmap") + ) + + pheatmap(expr_mat, + cutree_cols = num_clusters, + breaks = mat_breaks, + treeheight_row = 0, + treeheight_col = 0, + color = inferno(n=50, direction = -1), + legend = T, + fontsize = 8, + main = "Expression (Avg) of Selected Genes") + +} + +# A function used to set up the plotWilcoxon +# Credits to Josh O'Brien @ http://stackoverflow.com/questions/15505607 + +draw_colnames_45 <- function (coln, gaps,...) { + coord = pheatmap:::find_coordinates(length(coln), gaps) + x = coord$coord - 0.5 * coord$size + res = grid::textGrob(coln, x = x, y = grid::unit(1, "npc") - grid::unit(3,"bigpts"), + vjust = 0.75, hjust = 1, rot = 45, gp = grid::gpar(...) + ) + return(res) +} + + + +quantile_breaks <- function(mat, n = 10) { + breaks = quantile(mat, probs = seq(0, 1, length.out = n)) + breaks[!duplicated(breaks)] +} + + ####################################################################################### #### Conversion/Transformation @@ -4367,84 +4449,7 @@ calculateWilcoxon <- function(object, compare.datasets = TRUE, compare.clusters -#' Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. -#' -#' @param object \code{liger} object. After \code{calculateWilcoxon} is completed. -#' @param num_top number of top expressed genes to be displayed (default=10) -#' @return Updated \code{liger} object. -#' @export -#' @importFrom presto wilcoxauc -#' @examples -#' \dontrun{ -#' # liger object, Wilcoxon rank sum test complete -#' ligerex -#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test -#' p <- plotWilcoxon(ligerex) -#' } -#' -plotWilcoxon <- function(object, num_top = 10){ - clusterIDs = levels(object@clusters) - num_clusters = length(clusterIDs) - clusters = list() - top_genes = c() # store signature genes for each cluster - for (i in 1:num_clusters){ - clusters[[i]] = wilcoxon_compare_clusters[wilcoxon_compare_clusters$group == clusterIDs[i], ] - clusters[[i]] = clusters[[i]][order(clusters[[i]]$padj), ] - names(clusters[[i]])[3] = paste("cluster_",clusterIDs[i], sep = "") - top_genes = union(top_genes, clusters[[i]][1:num_top, "feature"]) - } - - top_genes = data.frame(feature = top_genes) - - clusters_expression = merge(top_genes, clusters[[1]][ ,c("feature", paste("cluster_","0", sep = ""))], by = "feature", by.x = T, sort=F) - - for (i in 2:num_clusters){ - clusters_expression = merge(clusters_expression, clusters[[i]][ ,c("feature", paste("cluster_",clusterIDs[i], sep = ""))], - by = "feature", sort=F, no.dups = F) - } - - expr_mat = as.matrix(clusters_expression[,-1]) - rownames(expr_mat) = top_genes$feature - - # Josh O'Brien @ http://stackoverflow.com/questions/15505607 - draw_colnames_45 = function (coln, gaps, ...) { - coord = pheatmap:::find_coordinates(length(coln), gaps) - x = coord$coord - 0.5 * coord$size - res = grid::textGrob(coln, x = x, y = unit(1, "npc") - unit(3,"bigpts"), - vjust = 0.75, hjust = 1, rot = 45, gp = grid::gpar(...) - ) - return(res) - } - - assignInNamespace( - x = "draw_colnames", - value = "draw_colnames_45", - ns = asNamespace("pheatmap") - ) - - quantile_breaks <- function(mat, n = 10) { - breaks <- quantile(mat, probs = seq(0, 1, length.out = n)) - breaks[!duplicated(breaks)] - } - - mat_breaks <- quantile_breaks(expr_mat, n = 30) - - - - pheatmap(expr_mat, - cutree_cols = num_clusters, - breaks = mat_breaks, - #cluster_rows = FALSE, - #cluster_cols = FALSE, - treeheight_row = 0, - treeheight_col = 0, - color = inferno(30), - #cex = 1.5, - legend = T, - fontsize = 8, - main = "Gene expression comparison among clusters") -} diff --git a/man/plotWilcoxon.Rd b/man/plotWilcoxon.Rd index c7562c3a..d29b7c0f 100644 --- a/man/plotWilcoxon.Rd +++ b/man/plotWilcoxon.Rd @@ -12,7 +12,7 @@ plotWilcoxon(object, num_top = 10) \item{num_top}{number of top expressed genes to be displayed (default=10)} } \value{ -Updated \code{liger} object. +a heatmap comparing the average expression values of selected top genes. } \description{ Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. From 3aa0d837e60afacf86add8c223d5d25f816a34e9 Mon Sep 17 00:00:00 2001 From: cgao90 Date: Wed, 28 Aug 2019 22:43:10 -0400 Subject: [PATCH 08/10] fix on draw_colnames_45 --- DESCRIPTION | 3 +-- R/liger.R | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4185e33f..3d250dce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,8 +37,7 @@ Depends: cowplot, Matrix, methods, - patchwork, - grid + patchwork RoxygenNote: 6.1.1 Suggests: Seurat, diff --git a/R/liger.R b/R/liger.R index ce7485cd..a86c4467 100644 --- a/R/liger.R +++ b/R/liger.R @@ -3839,7 +3839,7 @@ plotWilcoxon <- function(object, num_top = 10){ assignInNamespace( x = "draw_colnames", - value = "liger:::draw_colnames_45", + value = draw_colnames_45, ns = asNamespace("pheatmap") ) @@ -3851,7 +3851,7 @@ plotWilcoxon <- function(object, num_top = 10){ color = inferno(n=50, direction = -1), legend = T, fontsize = 8, - main = "Expression (Avg) of Selected Genes") + main = "Average Expression Value (Scaled) of Selected Genes") } @@ -3868,7 +3868,7 @@ draw_colnames_45 <- function (coln, gaps,...) { } - +# A function used to set up the breaks in plotWilcoxon quantile_breaks <- function(mat, n = 10) { breaks = quantile(mat, probs = seq(0, 1, length.out = n)) breaks[!duplicated(breaks)] From 10722bbe7a1418eb068e2c4b41ebb634c60af1bb Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff <44811472+jsodicoff@users.noreply.github.com> Date: Fri, 24 Jan 2020 17:46:50 -0500 Subject: [PATCH 09/10] Update NAMESPACE --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 4e077dc2..edddcf39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,7 +67,6 @@ importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) -importFrom(RANN.L1,nn2) importFrom(Rcpp,evalCpp) importFrom(Rtsne,Rtsne) importFrom(cowplot,plot_grid) From 84de5b3df4b6c44f377dfa3eeaffba9ddfb5fbd7 Mon Sep 17 00:00:00 2001 From: Joshua Sodicoff <44811472+jsodicoff@users.noreply.github.com> Date: Fri, 24 Jan 2020 17:47:42 -0500 Subject: [PATCH 10/10] Update DESCRIPTION --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 95298ee0..25ba7b85 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Imports: Rcpp (>= 0.12.10), cowplot, FNN, dplyr, - RANN.L1, grid, ggrepel, irlba, @@ -30,7 +29,7 @@ Imports: Rcpp (>= 0.12.10), mclust, patchwork, viridis, - pheatmap + pheatmap, stats Remotes: thomasp85/patchwork LinkingTo: Rcpp, RcppArmadillo