diff --git a/DESCRIPTION b/DESCRIPTION index cae8d8b8..25ba7b85 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Imports: Rcpp (>= 0.12.10), cowplot, FNN, dplyr, - RANN.L1, grid, ggrepel, irlba, @@ -29,6 +28,8 @@ Imports: Rcpp (>= 0.12.10), doSNOW, mclust, patchwork, + viridis, + pheatmap, stats Remotes: thomasp85/patchwork LinkingTo: Rcpp, RcppArmadillo diff --git a/NAMESPACE b/NAMESPACE index 87f5d363..edddcf39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(calcAlignment) export(calcAlignmentPerCluster) export(calcDatasetSpecificity) export(calcPurity) +export(calculateWilcoxon) export(clusterLouvainJaccard) export(convertOldLiger) export(createLiger) @@ -39,12 +40,13 @@ export(plotGene) export(plotGeneLoadings) export(plotGeneViolin) export(plotGenes) +export(plotWilcoxon) export(plotWordClouds) export(quantileAlignSNF) export(quantile_norm) export(read10X) export(removeMissingObs) -export(reorganizeLiger) +export(runJaccard) export(runTSNE) export(runUMAP) export(scaleNotCenter) @@ -52,6 +54,7 @@ export(scaleNotCenter_sparse) export(selectGenes) export(seuratToLiger) export(subsetLiger) +export(subsetWilcoxon) export(suggestK) export(suggestLambda) exportClasses(liger) @@ -64,7 +67,6 @@ importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) -importFrom(RANN.L1,nn2) importFrom(Rcpp,evalCpp) importFrom(Rtsne,Rtsne) importFrom(cowplot,plot_grid) @@ -109,13 +111,17 @@ importFrom(ggrepel,geom_text_repel) importFrom(grid,gpar) importFrom(grid,roundrectGrob) importFrom(grid,unit) +importFrom(grid,textGrob) importFrom(ica,icafast) importFrom(irlba,prcomp_irlba) importFrom(mclust,adjustedRandIndex) importFrom(methods,"slot<-") importFrom(plyr,mapvalues) importFrom(plyr,rbind.fill.matrix) +importFrom(presto,wilcoxauc) importFrom(riverplot,makeRiver) +importFrom(viridis, inferno) +importFrom(pheatmap, pheatmap) importFrom(riverplot,riverplot) importFrom(snow,makeCluster) importFrom(snow,stopCluster) diff --git a/R/liger.R b/R/liger.R index 61d4e2bf..7f7fd86b 100644 --- a/R/liger.R +++ b/R/liger.R @@ -22,12 +22,15 @@ NULL #' matrix) #' @slot W Shared gene loading factors (k by genes) #' @slot V Dataset-specific gene loading factors (one matrix per dataset, dimensions k by genes) +#' @slot jaccard Holds Jaccard matrices for use in Louvain-Jaccard clustering and imputation #' @slot tsne.coords Matrix of 2D coordinates obtained from running t-SNE on H.norm or H matrices #' @slot alignment.clusters Initial joint cluster assignments from shared factor alignment #' @slot clusters Joint cluster assignments for cells #' @slot snf List of values associated with shared nearest factor matrix for use in clustering and #' alignment (out.summary contains edge weight information between cell combinations) #' @slot agg.data Data aggregated within clusters +#' @slot wilcoxon.compare.datasets Data frame of Wilcoxon test output for comparing datasets +#' @slot wilcoxon.compare.clusters Data frame of Wilcoxon test output for comparing clusters #' @slot parameters List of parameters used throughout analysis #' @slot version Version of package used to create object #' @@ -50,10 +53,13 @@ liger <- methods::setClass( H.norm = "matrix", W = "matrix", V = "list", + jaccard = "list", tsne.coords = "matrix", alignment.clusters = 'factor', clusters= "factor", agg.data = "list", + wilcoxon.compare.datasets = "data.frame", + wilcoxon.compare.clusters = "data.frame", parameters = "list", snf = 'list', version = 'ANY' @@ -3408,7 +3414,8 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' ligerex <- runTSNE(ligerex) #' # plot expression for CD4 and return plots #' gene_plots <- plotGene(ligerex, "CD4", return.plots = T) -#' } +#' } + plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'dataset', log2scale = NULL, methylation.indices = NULL, plot.by = 'dataset', @@ -4096,6 +4103,88 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh return(output_list) } + + +#' Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. +#' +#' @param object \code{liger} object. After \code{calculateWilcoxon} is completed. +#' @param num_top number of top expressed genes to be displayed (default=10) +#' @return a heatmap comparing the average expression values of selected top genes. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, Wilcoxon rank sum test complete +#' ligerex +#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +#' p <- plotWilcoxon(ligerex) +#' } +#' +plotWilcoxon <- function(object, num_top = 10){ + clusterIDs = levels(object@clusters) + num_clusters = length(clusterIDs) + clusters = list() + top_genes = c() # store signature genes for each cluster + + for (i in 1:num_clusters){ + clusters[[i]] = object@wilcoxon.compare.clusters[object@wilcoxon.compare.clusters$group == clusterIDs[i], ] + clusters[[i]] = clusters[[i]][order(clusters[[i]]$padj), ] + names(clusters[[i]])[3] = paste("cluster_",clusterIDs[i], sep = "") + top_genes = union(top_genes, clusters[[i]][1:num_top, "feature"]) + } + + top_genes = data.frame(feature = top_genes) + + clusters_expression = merge(top_genes, clusters[[1]][ ,c("feature", paste("cluster_","0", sep = ""))], by = "feature", by.x = T, sort=F) + + for (i in 2:num_clusters){ + clusters_expression = merge(clusters_expression, clusters[[i]][ ,c("feature", paste("cluster_",clusterIDs[i], sep = ""))], + by = "feature", sort=F) + } + + expr_mat = as.matrix(clusters_expression[,-1]) + rownames(expr_mat) = top_genes$feature + + mat_breaks = quantile_breaks(expr_mat, n = 50) + + assignInNamespace( + x = "draw_colnames", + value = draw_colnames_45, + ns = asNamespace("pheatmap") + ) + + pheatmap(expr_mat, + cutree_cols = num_clusters, + breaks = mat_breaks, + treeheight_row = 0, + treeheight_col = 0, + color = inferno(n=50, direction = -1), + legend = T, + fontsize = 8, + main = "Average Expression Value (Scaled) of Selected Genes") + +} + +# A function used to set up the plotWilcoxon +# Credits to Josh O'Brien @ http://stackoverflow.com/questions/15505607 + +draw_colnames_45 <- function (coln, gaps,...) { + coord = pheatmap:::find_coordinates(length(coln), gaps) + x = coord$coord - 0.5 * coord$size + res = grid::textGrob(coln, x = x, y = grid::unit(1, "npc") - grid::unit(3,"bigpts"), + vjust = 0.75, hjust = 1, rot = 45, gp = grid::gpar(...) + ) + return(res) +} + + +# A function used to set up the breaks in plotWilcoxon +quantile_breaks <- function(mat, n = 10) { + breaks = quantile(mat, probs = seq(0, 1, length.out = n)) + breaks[!duplicated(breaks)] +} + + ####################################################################################### #### Conversion/Transformation @@ -4614,3 +4703,203 @@ convertOldLiger = function(object, override.raw = F) { print(paste0('New slots not filled: ', setdiff(slots_new[slots_new != "cell.data"], slots))) return(new.liger) } + +#' Make Jaccard matrix +#' +#' @param object \code{liger} object. Should run scaleNotCenter first +#' +#' @return Updated \code{liger} object. +#' @export +#' @examples +#' \dontrun{ +#' # liger object, after running scaleNotCenter +#' ligerex +#' # add Jaccard matrix to a list for each dataset +#' ligerex <- runJaccard(ligerex) +#' } +runJaccard <- function(object){ + n = 1 + for(i in object@scale.data){ + i[i!=0] <- 1 + common_values <- i %*% t(i) + nonzero_value_index <- which(common_values > 0, arr.ind=TRUE) + nonzero_value <- common_values[nonzero_value_index] + row_sums <- rowSums(t(i)) + jaccard <- nonzero_value/(row_sums[nonzero_value_index[,1]] + +row_sums[nonzero_value_index[,1]] - nonzero_value) + object@jaccard[[n]] = sparseMatrix(i = nonzero_value_index[,1], + j = nonzero_value_index[,2], + x = jaccard, dims = dim(common_values)) + for(j in 1:nrow(object@jaccard[[n]])){ + object@jaccard[[n]][j,j] = 0 + } + object@jaccard[[n]][is.na(object@jaccard[[n]])] <- 0 + n = n + 1 + } + return(object) +} + + +#' Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +#' +#' @param object \code{liger} object. Should run quantileAlignSNF first +#' @param compare.datasets differential expression analysis for different datasets within in each cluster +#' (default TRUE) +#' @param compare.clusters differential expression analysis for different clusters (default TRUE). +#' +#' @return Updated \code{liger} object. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, clustering complete +#' ligerex +#' # fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +#' ligerex <- calculateWilcoxon(ligerex) +#' } +#' +calculateWilcoxon <- function(object, compare.datasets = TRUE, compare.clusters = TRUE){ + if (!require("presto", quietly = TRUE)) { + print("Package \"presto\" needed to perform fast Wilcoxon rank sum test. Installation started.") + library(devtools) + install_github('immunogenomics/presto') + } + + clusterIDs = levels(object@clusters) # unique cluster labels + cell_clusterID = as.vector(object@clusters) # cluster label for each cell + cell_ID = rownames(as.data.frame(object@clusters)) # cell label + source_name = attributes(object@scale.data)$names # label of different datasets + + num_source = length(source_name) # number of different datasets + num_clusters = length(clusterIDs) # number of clusters + + all_seq = c() + cell_source = c() + for (i in 1:num_source){ + all_seq = rbind(all_seq, object@scale.data[[i]]) + cell_source = c(cell_source, rep(source_name[i], nrow(object@scale.data[[i]]))) + } + cell_info = as.data.frame(cbind(cell_ID,cell_source,cell_clusterID)) + names(cell_info) = c("cell_id","source","cluster_id") + + + ### compare datasets and/or clusters + if (compare.datasets == TRUE) { + wilcoxon_compare_datasets = c() # save all the test results + + for (i in 1:length(clusterIDs)){ + cluster_i = cell_info[cell_info$cluster_id == clusterIDs[i], ] + seq_i = t(all_seq[rownames(all_seq) %in% cluster_i$cell_id, ]) # gene by cell matrix + wilcoxon_cluster_i = wilcoxauc(seq_i, cluster_i$source) + wilcoxon_cluster_i = cbind(rep(clusterIDs[i], nrow(wilcoxon_cluster_i)), wilcoxon_cluster_i) + wilcoxon_compare_datasets = rbind(wilcoxon_compare_datasets, wilcoxon_cluster_i) + } + + names(wilcoxon_compare_datasets)[1] = "cluster" + object@wilcoxon.compare.datasets = wilcoxon_compare_datasets + } + + if (compare.clusters == TRUE) { + wilcoxon_compare_clusters = c() # save all the test results + + for (i in 1:num_clusters){ + cluster_i_others = cell_info + cluster_i_others[cluster_i_others$cluster_id != clusterIDs[i], ]$cluster_id == "others" + seq_ij = t(all_seq) # gene by cell matrix + wilcoxon_cluster_i = wilcoxauc(seq_ij, cluster_i_others$cluster_id) + wilcoxon_cluster_i = wilcoxon_cluster_i[wilcoxon_cluster_i$group == clusterIDs[i], ] + wilcoxon_compare_clusters = rbind(wilcoxon_compare_clusters, wilcoxon_cluster_i) + } + object@wilcoxon.compare.clusters = wilcoxon_compare_clusters + } + return(object) +} + + + + + + + +#' Subset a dataframe produced by calculateWilcox by feature, dataset, or column +#' +#' @param object \code{liger} object. Should run calculateWilcox first +#' @param features A list of features to include in the subset (default NULL) +#' @param datasets A list of datasets to include in the subset (defualt NULL). +#' @param columns A list of columns to include in the subset (default NULL). +#' @param return.subset Return the subsetted dataframe instead of a liger object (default FALSE) +#' @param save.subset Replace the unsubset dataframe with the subset (default TRUE) +#' +#' @return Updated \code{liger} object. +#' @export +#' @importFrom presto wilcoxauc +#' @examples +#' \dontrun{ +#' # liger object, after running CalculateWilcoxon +#' ligerex +#' # create lists of features, datasets, and columns to subset by +#' genes = c("UBBP4","KIAA1715","TMEM50B") +#' datasets = c("tenx") +#' # +#' ligerex <- subsetWilcoxon(ligerex, features = genes, datasets = datasets) +#' } +subsetWilcoxon <- function(object, features = NULL, datasets = NULL, columns = NULL, + return.subset = FALSE, save.subset = TRUE){ + if(!return.subset && !save.subset){ + return(object) + } + if(!is.null(columns)){ + columns = c("feature", "group", columns) + wilcoxon = object@wilcoxon[columns] + } + if(!is.null(features)){ + wilcoxon = object@wilcoxon[object@wilcoxon$feature == toupper(features)] + } + if(!is.null(datasets)){ + for(name in datasets){ + wilcoxon = object@wilcoxon[grepl(toupper(name), object@wilcoxon$group, fixed = TRUE)] + } + } + if(save.subset){ + object@wilcoxon = wilcoxon + } + if(return.subset){ + return(wilcoxon) + } + return(object) +} + +#' Make Jaccard matrix (will have use when imputation is implemented) +#' +#' @param object \code{liger} object. Should run scaleNotCenter first +#' +#' @return Updated \code{liger} object. +#' @export +#' @examples +#' \dontrun{ +#' # liger object, after running scaleNotCenter +#' ligerex +#' # add Jaccard matrix to a list for each dataset +#' ligerex <- runJaccard(ligerex) +#' } +runJaccard <- function(object){ + n = 1 + for(i in object@scale.data){ + i[i!=0] <- 1 + common_values <- i %*% t(i) + nonzero_value_index <- which(common_values > 0, arr.ind=TRUE) + nonzero_value <- common_values[nonzero_value_index] + row_sums <- rowSums(t(i)) + jaccard <- nonzero_value/(row_sums[nonzero_value_index[,1]] + +row_sums[nonzero_value_index[,1]] - nonzero_value) + object@jaccard[[n]] = sparseMatrix(i = nonzero_value_index[,1], + j = nonzero_value_index[,2], + x = jaccard, dims = dim(common_values)) + for(j in 1:nrow(object@jaccard[[n]])){ + object@jaccard[[n]][j,j] = 0 + } + object@jaccard[[n]][is.na(object@jaccard[[n]])] <- 0 + n = n + 1 + } + return(object) +} diff --git a/man/calculateWilcoxon.Rd b/man/calculateWilcoxon.Rd new file mode 100644 index 00000000..65e71a5d --- /dev/null +++ b/man/calculateWilcoxon.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{calculateWilcoxon} +\alias{calculateWilcoxon} +\title{Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test.} +\usage{ +calculateWilcoxon(object, compare.datasets = TRUE, + compare.clusters = TRUE) +} +\arguments{ +\item{object}{\code{liger} object. Should run quantileAlignSNF first} + +\item{compare.datasets}{differential expression analysis for different datasets within in each cluster +(default TRUE)} + +\item{compare.clusters}{differential expression analysis for different clusters (default TRUE).} +} +\value{ +Updated \code{liger} object. +} +\description{ +Analyze differential gene expression using presto's implementation of the Wilcoxon rank sum test. +} +\examples{ +\dontrun{ +# liger object, clustering complete +ligerex +# fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +ligerex <- calculateWilcoxon(ligerex) +} + +} diff --git a/man/liger.Rd b/man/liger.Rd index 3f2e30ca..183d635b 100644 --- a/man/liger.Rd +++ b/man/liger.Rd @@ -38,6 +38,8 @@ matrix)} \item{\code{V}}{Dataset-specific gene loading factors (one matrix per dataset, dimensions k by genes)} +\item{\code{jaccard}}{Holds Jaccard matrices for use in Louvain-Jaccard clustering and imputation} + \item{\code{tsne.coords}}{Matrix of 2D coordinates obtained from running t-SNE on H.norm or H matrices} \item{\code{alignment.clusters}}{Initial joint cluster assignments from shared factor alignment} @@ -49,6 +51,10 @@ alignment (out.summary contains edge weight information between cell combination \item{\code{agg.data}}{Data aggregated within clusters} +\item{\code{wilcoxon.compare.datasets}}{Data frame of Wilcoxon test output for comparing datasets} + +\item{\code{wilcoxon.compare.clusters}}{Data frame of Wilcoxon test output for comparing clusters} + \item{\code{parameters}}{List of parameters used throughout analysis} \item{\code{version}}{Version of package used to create object} diff --git a/man/plotWilcoxon.Rd b/man/plotWilcoxon.Rd new file mode 100644 index 00000000..d29b7c0f --- /dev/null +++ b/man/plotWilcoxon.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{plotWilcoxon} +\alias{plotWilcoxon} +\title{Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster.} +\usage{ +plotWilcoxon(object, num_top = 10) +} +\arguments{ +\item{object}{\code{liger} object. After \code{calculateWilcoxon} is completed.} + +\item{num_top}{number of top expressed genes to be displayed (default=10)} +} +\value{ +a heatmap comparing the average expression values of selected top genes. +} +\description{ +Generate heatmap for visulization of Wilconxon rank sum test results. Display top expressed genes for each cluster. +} +\examples{ +\dontrun{ +# liger object, Wilcoxon rank sum test complete +ligerex +# fill wilcoxon slot with dataframe of information from presto's Wilcoxon rank sum test +p <- plotWilcoxon(ligerex) +} + +} diff --git a/man/runJaccard.Rd b/man/runJaccard.Rd new file mode 100644 index 00000000..4d58c30b --- /dev/null +++ b/man/runJaccard.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{runJaccard} +\alias{runJaccard} +\title{Make Jaccard matrix} +\usage{ +runJaccard(object) + +runJaccard(object) +} +\arguments{ +\item{object}{\code{liger} object. Should run scaleNotCenter first} + +\item{object}{\code{liger} object. Should run scaleNotCenter first} +} +\value{ +Updated \code{liger} object. + +Updated \code{liger} object. +} +\description{ +Make Jaccard matrix + +Make Jaccard matrix (will have use when imputation is implemented) +} +\examples{ +\dontrun{ +# liger object, after running scaleNotCenter +ligerex +# add Jaccard matrix to a list for each dataset +ligerex <- runJaccard(ligerex) +} +\dontrun{ +# liger object, after running scaleNotCenter +ligerex +# add Jaccard matrix to a list for each dataset +ligerex <- runJaccard(ligerex) +} +} diff --git a/man/subsetWilcoxon.Rd b/man/subsetWilcoxon.Rd new file mode 100644 index 00000000..829d19c3 --- /dev/null +++ b/man/subsetWilcoxon.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liger.R +\name{subsetWilcoxon} +\alias{subsetWilcoxon} +\title{Subset a dataframe produced by calculateWilcox by feature, dataset, or column} +\usage{ +subsetWilcoxon(object, features = NULL, datasets = NULL, + columns = NULL, return.subset = FALSE, save.subset = TRUE) +} +\arguments{ +\item{object}{\code{liger} object. Should run calculateWilcox first} + +\item{features}{A list of features to include in the subset (default NULL)} + +\item{datasets}{A list of datasets to include in the subset (defualt NULL).} + +\item{columns}{A list of columns to include in the subset (default NULL).} + +\item{return.subset}{Return the subsetted dataframe instead of a liger object (default FALSE)} + +\item{save.subset}{Replace the unsubset dataframe with the subset (default TRUE)} +} +\value{ +Updated \code{liger} object. +} +\description{ +Subset a dataframe produced by calculateWilcox by feature, dataset, or column +} +\examples{ +\dontrun{ +# liger object, after running CalculateWilcoxon +ligerex +# create lists of features, datasets, and columns to subset by +genes = c("UBBP4","KIAA1715","TMEM50B") +datasets = c("tenx") +# +ligerex <- subsetWilcoxon(ligerex, features = genes, datasets = datasets) +} +}