diff --git a/.logs/sagenet/ST_human_heart b/.logs/sagenet/ST_human_heart index ea9c733..a926a3f 100644 --- a/.logs/sagenet/ST_human_heart +++ b/.logs/sagenet/ST_human_heart @@ -1626,3 +1626,47 @@ Resource usage summary: The output (if any) is above this job summary. +Namespace(i='data_tidy', oo='output', tag='ST_human_heart', tag_query='query', tag_ref='ST_all') +cuda:0 +1 +2 +3 +4 +5 + +------------------------------------------------------------ +Sender: LSF System +Subject: Job 2251715: in cluster Done + +Job was submitted from host by user in cluster at Sun Feb 27 18:43:12 2022 +Job was executed on host(s) , in queue , as user in cluster at Sun Feb 27 18:46:14 2022 + was used as the home directory. + was used as the working directory. +Started at Sun Feb 27 18:46:14 2022 +Terminated at Sun Feb 27 19:14:39 2022 +Results reported at Sun Feb 27 19:14:39 2022 + +Your job looked like: + +------------------------------------------------------------ +# LSBATCH: User input +python3 code/experiments/run_sagenet.py -i data_tidy --tag ST_human_heart --tag_ref ST_all --tag_query query --oo output +------------------------------------------------------------ + +Successfully completed. + +Resource usage summary: + + CPU time : 60493.49 sec. + Max Memory : 3450 MB + Average Memory : 3112.68 MB + Total Requested Memory : 3000.00 MB + Delta Memory : -450.00 MB + Max Swap : - + Max Processes : 4 + Max Threads : 131 + Run time : 1708 sec. + Turnaround time : 1887 sec. + +The output (if any) is above this job summary. + diff --git a/Rplots.pdf b/Rplots.pdf index 9236d73..75be9f4 100644 Binary files a/Rplots.pdf and b/Rplots.pdf differ diff --git a/analysis/DHH_data_all.rmd b/analysis/DHH_data_all.rmd index c2a1696..d3f6704 100644 --- a/analysis/DHH_data_all.rmd +++ b/analysis/DHH_data_all.rmd @@ -209,7 +209,7 @@ meta_dt %<>% meta_dt$ent = 1 - meta_dt$ent/(max(meta_dt$ent)) sagenet_p = plot_2d( - dim_df = map_2d[['sagenet']][meta_dt$,], + dim_df = map_2d[['sagenet']][meta_dt$cell_id,], labels = meta_dt$celltype, label_cols = cell_type_cols, hide_legend = FALSE, diff --git a/analysis/DHH_data_query.rmd b/analysis/DHH_data_query.rmd new file mode 100644 index 0000000..fd821e4 --- /dev/null +++ b/analysis/DHH_data_query.rmd @@ -0,0 +1,368 @@ +--- +title: "Figure 4: Developing Human Heart - scRNAseq schema" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/DHH_data_query.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +# library('pika') +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +# library('anndata') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'ST_human_heart' +ref_tag = 'ST_all' +data_dir = file.path('data_tidy', tag) +out_dir = file.path('output', tag) +obj_dir = 'int_data/ST_human_heart/query' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/ST_human_heart/query' +dir.create(fig_dir) +fig_list = list() +data_f = list.files(data_dir) +exp_names = 'query' +names(exp_names) = exp_names +sce_f = exp_names %>% + map(~file.path(data_dir, paste0(.x, '.h5ad'))) +methods = 'sagenet' +names(methods) = methods +preds_f = methods %>% + map(function(y) exp_names %>% + map(~file.path(out_dir, y, paste0(paste('preds', 'ST_all', .x, sep='_'), '.h5ad'))) + ) + +col_dt_f = file.path('int_data/ST_human_heart', 'col_dt_ST.txt') +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +overwrite = FALSE +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce_ST = readH5AD('output/ST_human_heart/sagenet/preds_ST_all_ST.h5ad') +s_sce = readH5AD('output/ST_human_heart/sagenet/preds_ST_all_query.h5ad') +meta_dt = colData(s_sce) %>% as.data.table %>% + .[, class__ := gsub(' $', '', class__)] +meta_dt[dataset=='ST']$class__ = class__code[sce_ST$class__] +rm(sce_ST) +colData(s_sce) = DataFrame(meta_dt) +colnames(s_sce) = meta_dt$cell_id +rm(sce_list) +meta_dt$ent = get_confidence(s_sce) / 3 +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + # Gene importanc + + preds_list = list() + preds_list[['sagenet']] = anndata::read_h5ad('output/ST_human_heart/sagenet/preds_ST_all_query.h5ad')$obsm$dist_map + + +gc() +} + +col_dt = col_dt_f %>% fread +# type_cols = .palette2[1:length(unique(meta_dt$class__))] +# names(type_cols) = as.factor(as.character(unique(meta_dt$celltype))) +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + + # sagenet distances + # d[['sagenet']] = preds_list[['sagenet']] %>% map(as.dist) + d_mtx[['sagenet']] = preds_list[['sagenet']] + + rownames(d_mtx[['sagenet']]) = colnames(d_mtx[['sagenet']]) = colnames(s_sce) + map_2d[['sagenet']] = Rtsne(d_mtx[['sagenet']], is_distance=T)$Y + rownames(map_2d[['sagenet']]) = colnames(s_sce) + meta_dt[, `:=`(sagenet_all_1 = ..map_2d[['sagenet']][, 1], + sagenet_all_2 = ..map_2d[['sagenet']][, 2])] + + + expr_pcs = prcomp(assays(s_sce)[[1]])$rotation[,1:10] + d[['exprs']] = dist(expr_pcs) %>% as.dist + d_mtx[['exprs']] = d[['exprs']] %>% as.matrix + rownames(d_mtx[['exprs']]) = colnames(d_mtx[['exprs']]) = colnames(s_sce) + # map_2d[['exprs']] = uwot::umap(d[['exprs']], n_neighbors=50) + map_2d[['exprs']] = Rtsne(d_mtx[['exprs']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['exprs']]) = colnames(s_sce) + meta_dt[, `:=`(expr_tsne_1 = ..map_2d[['exprs']][, 1], + expr_tsne_2 = ..map_2d[['exprs']][, 2])] + + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_dt + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_query = readRDS(file.path(obj_dir, 'meta_dt.Rds')) +} +``` + + + +## Cell2Cell afinity scores +```{r calc_c2c, fig.height=15, fig.width=15, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_map.Rds')) +){ + + g_out = map_2d[['sagenet']] %>% + get_delaunay(plot=FALSE) %>% + .$ + + m_dt = copy(meta_dt)[, class__:=paste(class__, dataset, sep=' - ')] + colData(s_sce) = DataFrame(m_dt) + colnames(s_sce) = m_dt$cell_id + + ccc_map = cellCellContact( + sce = s_sce, + group = 'class__', + graph = g_out, + nperm = 1000, + plot = FALSE, + cellID ='cell_id') + + obj_list[['ccc_map']] = ccc_map +}else{ + ccc_map = readRDS(file.path(obj_dir, 'ccc_map.Rds')) +} +pmat = ccc_map$pmat +# rownames(pmat) = colnames(pmat) = gsub(' $', '', colnames(pmat)) +dataset_class_ord = c(paste(names(cell_type_cols), 'scRNAseq', sep=' - '), paste(names(class__cols), 'ST', sep=' - ')) +dataset_class_cols = c(cell_type_cols, class__cols) +names(dataset_class_cols) = dataset_class_ord +ccc_plot = cellCellContactHeatmapTriangle(pmat, dataset_class_cols, title='') + +ccc_plot + + +``` + + + +## B: UMAP plosts of expression space and the reconstructed space +```{r B, fig.height=12, fig.width=22, message=FALSE, cache=FALSE, eval=TRUE} +# meta_dt %<>% .[, .SD[sample(.N)]] +meta_dt %<>% + .[, class__ := factor(class__, ordered=TRUE, levels=c(class__ord, cell_type_ord))] %>% + .[order(class__)] +meta_dt$ent = 1 - meta_dt$ent/(max(meta_dt$ent)) + +sagenet_ST_p = plot_2d( + dim_df = map_2d[['sagenet']][meta_dt[dataset=='ST']$cell_id,], + labels = meta_dt[dataset=='ST']$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = FALSE, + title = 'Spatial Transcriptomics', + sz = 2.5, + alpha = meta_dt[dataset=='ST']$ent, + label_title = 'region', + shape=17 +) + labs(x = '', y = 'SageNet') + + xlim(c(min(map_2d[['sagenet']][,1]), max(map_2d[['sagenet']][,1]))) + + ylim(c(min(map_2d[['sagenet']][,2]), max(map_2d[['sagenet']][,2]))) + + +sagenet_scRNAseq_p = plot_2d( + dim_df = map_2d[['sagenet']][meta_dt[dataset=='scRNAseq']$cell_id,], + labels = meta_dt[dataset=='scRNAseq']$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = FALSE, + title = 'scRNAseq', + sz = 2.5, + alpha = meta_dt[dataset=='scRNAseq']$ent, + label_title = 'cell type', + shape=16 +) + labs(x = '', y = '') + + xlim(c(min(map_2d[['sagenet']][,1]), max(map_2d[['sagenet']][,1]))) + + ylim(c(min(map_2d[['sagenet']][,2]), max(map_2d[['sagenet']][,2]))) + +m_dt = data.table(cell_id = meta_dt$cell_id, map_2d[['sagenet']][meta_dt$cell_id,], class__ = meta_dt$class__, dataset=meta_dt$dataset) + +cell_type_mean = unique(m_dt[, .(mean_x = median(V1), mean_y=median(V2), dataset), by='class__']) + +sagenet_mean_p = plot_2d( + dim_df = cell_type_mean[, .(mean_x, mean_y)], + labels = cell_type_mean$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = TRUE, + title = 'medians', + sz = 4, + # alpha = meta_dt[dataset=='scRNAseq']$ent, + label_title = 'cell type', + shape=ifelse(cell_type_mean$dataset=='ST', 17, 16) +) + labs(x = '', y = '') + + xlim(c(min(map_2d[['sagenet']][,1]), max(map_2d[['sagenet']][,1]))) + + ylim(c(min(map_2d[['sagenet']][,2]), max(map_2d[['sagenet']][,2]))) + + +# sagenet_ST_p + sagenet_scRNAseq_p + sagenet_mean_p + plot_layout(guides = 'collect') +# fig_list[['B']] = sagenet_p +# fig_list[['B']] + + +exprs_ST_p = plot_2d( + dim_df = map_2d[['exprs']][meta_dt[dataset=='ST']$cell_id,], + labels = meta_dt[dataset=='ST']$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = TRUE, + title = '', + sz = 2.5, + # alpha = meta_dt[dataset=='ST']$ent, + label_title = 'region', + shape=17 +) + labs(x = '', y = 'Expression') + + xlim(c(min(map_2d[['exprs']][,1]), max(map_2d[['exprs']][,1]))) + + ylim(c(min(map_2d[['exprs']][,2]), max(map_2d[['exprs']][,2]))) + +exprs_scRNAseq_p = plot_2d( + dim_df = map_2d[['exprs']][meta_dt[dataset=='scRNAseq']$cell_id,], + labels = meta_dt[dataset=='scRNAseq']$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = TRUE, + title = '', + sz = 2.5, + # alpha = meta_dt[dataset=='scRNAseq']$ent, + label_title = 'cell type', + shape=16 +) + labs(x = '', y = '') + + xlim(c(min(map_2d[['exprs']][,1]), max(map_2d[['exprs']][,1]))) + + ylim(c(min(map_2d[['exprs']][,2]), max(map_2d[['exprs']][,2]))) + +m_dt = data.table(cell_id = meta_dt$cell_id, map_2d[['exprs']][meta_dt$cell_id,], class__ = meta_dt$class__, dataset=meta_dt$dataset) + +cell_type_mean = unique(m_dt[, .(mean_x = median(V1), mean_y=median(V2), dataset), by='class__']) + +exprs_mean_p = plot_2d( + dim_df = cell_type_mean[, .(mean_x, mean_y)], + labels = cell_type_mean$class__, + label_cols = c(cell_type_cols, class__cols), + hide_legend = TRUE, + title = '', + sz = 4, + # alpha = meta_dt[dataset=='scRNAseq']$ent, + label_title = 'cell type', + shape=ifelse(cell_type_mean$dataset=='ST', 17, 16) +) + labs(x = '', y = '') + + xlim(c(min(map_2d[['exprs']][,1]), max(map_2d[['exprs']][,1]))) + + ylim(c(min(map_2d[['exprs']][,2]), max(map_2d[['exprs']][,2]))) + + +sagenet_ST_p + sagenet_scRNAseq_p + sagenet_mean_p + exprs_ST_p + exprs_scRNAseq_p + exprs_mean_p + plot_layout(ncol=3, guides = 'collect') +# fig_list[['B']] = sagenet_p +# fig_list[['B']] + + +# meta_dt %>% +# ggplot + +# aes(x = factor(class__, ordered=TRUE, levels= names(type_cols)), y = ent, fill=class__) + +# geom_violin() + +# # + geom_jitter(height = 0, width = 0.1) + +# theme_bw() + +# scale_fill_manual(values=type_cols, na.value='gray') + +# labs(y='confidence score', x= '') + +# theme(legend.position='none') + +# theme(axis.text.x=element_text(angle = 45, hjust = 1)) + +``` + +## XD: +```{r XD, fig.height=6, fig.width=5, message=FALSE, cache=FALSE} +# tab_dt= copy(meta_dt[, .(ent=median(ent), .N), by = 'class__']) %>% .[order(ent), ] +# type_ord = unique(tab_dt$class__) +# data_summary <- function(x) { +# m <- mean(x) +# ymin <- m-sd(x) +# ymax <- m+sd(x) +# return(c(y=m,ymin=ymin,ymax=ymax)) +# } + +# meta_dt %>% +# ggplot + +# aes(x = factor(class__, ordered=TRUE, levels= type_ord), y = ent, color=class__) + +# theme_minimal() + +# scale_color_manual(values=cell_type_cols, na.value='#e6e6e6', drop=TRUE) + +# labs(y='confidence score', x= '') + +# stat_summary(fun.data=data_summary, size=1.5) + +# coord_flip() + +# theme(legend.position='none') +``` + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig2&Supp1-8.rmd b/analysis/Fig2&Supp1-8.rmd new file mode 100644 index 0000000..cbf2607 --- /dev/null +++ b/analysis/Fig2&Supp1-8.rmd @@ -0,0 +1,893 @@ +--- +title: "Spatial Mouse Atlas - Seqfish - Mouse Embryo, Figure 2, Supp. Fig. 1-8" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig2&Supp1-8.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'seqfish_mouse_embryo' +ref_tag = 'embryo1_2' +out_dir = file.path('output', tag) +obj_dir = 'int_data/seqfish_mouse_embryo/embryo1_2' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/seqfish_mouse_embryo/embryo1_2' +dir.create(fig_dir) +fig_list = list() + +methods = list.files(out_dir) +names(methods) = methods +preds_f = methods %>% + map(~file.path(out_dir, .x, paste0(paste('preds', ref_tag, 'query_data', sep='_'), '.h5ad'))) +genes_f = file.path(obj_dir, 'gene_dt_embryo1_2.txt') +imp_f = file.path(obj_dir, 'imp_embryo1_2.txt') +col_dt_f = file.path(obj_dir, 'col_dt_embryo1_2.txt') +adj_f = 'models/seqfish_mouse_embryo/embryo1_2/embryo1_2_leiden_0.1.h5ad' +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +n_markers = 3 +overwrite = FALSE +n_PCs = 30 +n_PCs_markers = 10 +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce = preds_f[['sagenet']] %>% readH5AD +cell_idx = which(sce$embryo != 'scRNAseq') +sce %<>% .[, cell_idx] +meta_dt = colData(sce) %>% as.data.table +gene_names = rownames(sce) +adj = assays(readH5AD(adj_f))[[1]] +rownames(adj) = colnames(adj) = gene_names +adj = adj[rowSums(adj) != 0, rowSums(adj) != 0] + + +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + preds_list = list() + preds_list[['sagenet']] = anndata::read_h5ad(preds_f[['sagenet']])$obsm$dist_map + adj = anndata::read_h5ad(preds_f[['sagenet']])$varm$adj + preds_list[['sagenet']] %<>% as('sparseMatrix') %>% .[cell_idx, cell_idx] + ## Novosparc predictions + preds_list[['novosparc']] = preds_f[['novosparc']] %>% read_preds_novosparc %>% .[cell_idx, ] + gc() + # Tangram predictions + preds_list[['tangram_cells']] = preds_f[['tangram_cells']] %>% read_preds_tangram %>% .[cell_idx, ] + gc() + + +gc() +} + +imp = imp_f %>% fread %>% setnames('V1', 'GENE') +genes = imp$GENE +imp %<>% .[,-1] %>% as.matrix +rownames(imp) = genes +imp_abs = abs(imp) +# imp = apply(imp, 2, function(x) (x)/(max(x))) +imp_dt = apply(imp, 2, function(x) rownames(imp)[order(-x)[1:n_markers]]) %>% + as.data.table +markers = imp_dt %>% unlist %>% setdiff(c()) +print(markers) +col_dt = col_dt_f %>% fread +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + map_2d[['tangram_cells']] = preds_list[['tangram_cells']] + map_2d[['True Space']] = as.matrix(meta_dt[, .(x,y)]) + rownames(map_2d[['True Space']]) = meta_dt$cell_id + map_2d[['novosparc']] = preds_list[['novosparc']] + + # sagenet distances + d[['sagenet']] = preds_list[['sagenet']] %>% as.dist + d_mtx[['sagenet']] = d[['sagenet']] %>% as.matrix + rownames(d_mtx[['sagenet']]) = colnames(d_mtx[['sagenet']]) = colnames(sce) + map_2d[['sagenet']] = Rtsne(d_mtx[['sagenet']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['sagenet']]) = colnames(sce) + meta_dt[, `:=`(sagenet_1 = ..map_2d[['sagenet']][cell_id, 1], + sagenet_2 = ..map_2d[['sagenet']][cell_id, 2])] + + # distances based on expression values + expr_pcs = prcomp(assays(sce)[[1]])$rotation[,1:10] + d[['exprs']] = dist(expr_pcs) %>% as.dist + d_mtx[['exprs']] = d[['exprs']] %>% as.matrix + rownames(d_mtx[['exprs']]) = colnames(d_mtx[['exprs']]) = colnames(sce) + # map_2d[['exprs']] = uwot::umap(d[['exprs']], n_neighbors=50) + map_2d[['exprs']] = Rtsne(d_mtx[['exprs']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['exprs']]) = colnames(sce) + meta_dt[, `:=`(expr_tsne_1 = ..map_2d[['exprs']][cell_id, 1], + expr_tsne_2 = ..map_2d[['exprs']][cell_id, 2])] + + # distances in the marker-subset space + markers_pcs = prcomp(assays(sce[markers,])[[1]])$rotation[,1:5] + d[['sagenet_markers']] = dist(markers_pcs) %>% as.dist + d_mtx[['sagenet_markers']] = d[['sagenet_markers']] %>% as.matrix + rownames(d_mtx[['sagenet_markers']]) = colnames(d_mtx[['sagenet_markers']]) = colnames(sce) + # map_2d[['sagenet_markers']] = uwot::umap(d[['sagenet_markers']], n_neighbors=50) + map_2d[['sagenet_markers']] = Rtsne(d_mtx[['sagenet_markers']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['sagenet_markers']]) = colnames(sce) + meta_dt[, `:=`(markers_tsne_1 = ..map_2d[['sagenet_markers']][cell_id, 1], + markers_tsne_2 = ..map_2d[['sagenet_markers']][cell_id, 2])] + + # tangram + d[['tangram_cells']] = dist(preds_list[['tangram_cells']]) + d_mtx[['tangram_cells']] = d[['tangram_cells']] %>% as.matrix + rownames(d_mtx[['tangram_cells']]) = colnames(d_mtx[['tangram_cells']]) = meta_dt$cell_id + rownames(map_2d[['tangram_cells']]) = colnames(sce) + meta_dt[, `:=`(tangram_1 = ..map_2d[['tangram_cells']][cell_id, 1], + tangram_2 = ..map_2d[['tangram_cells']][cell_id, 2])] + + d[['novosparc']] = dist(preds_list[['novosparc']]) + d_mtx[['novosparc']] = d[['novosparc']] %>% as.matrix + rownames(d_mtx[['novosparc']]) = colnames(d_mtx[['novosparc']]) = meta_dt$cell_id + rownames(map_2d[['novosparc']]) = colnames(sce) + meta_dt[, `:=`(novosparc_1 = ..map_2d[['novosparc']][cell_id, 1], + novosparc_2 = ..map_2d[['novosparc']][cell_id, 2])] + + # true physical distances + d_true = dist(meta_dt[, .(x,y)]) + d_mtx_true = d_true %>% as.matrix + rownames(d_mtx_true) = colnames(d_mtx_true) = meta_dt$cell_id + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_dt + obj_list[['d']] = d + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_dt = readRDS(file.path(obj_dir, 'meta_dt.Rds')) + # d = readRDS(file.path(obj_dir, 'd.Rds')) +} +``` + +## Cell-Cell afinity scores +```{r calc_c2c, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_list.Rds')) +){ + ccc_list = list() + # True physical space + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + g_out = map_2d[['True Space']] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[['True Space']][[e]] = cellCellContact( + sce = sce[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID='cell_id') + } + + # Predicted spaces + + for(meth in setdiff(names(map_2d), 'True Space')){ + # for(meth in 'sagenet_markers'){ + ccc_list[[meth]] = list() + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + g_out = map_2d[[meth]] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[[meth]][[e]] = cellCellContact( + sce = sce[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID ='cell_id') + } + } + + obj_list[['ccc_list']] = ccc_list +}else{ + ccc_list = readRDS(file.path(obj_dir, 'ccc_list.Rds')) +} +ccc_dist = list() +for(meth in setdiff(method_ord, 'True Space')){ + ccc_dist[[meth]] = c() + for(e in levels(factor(meta_dt$embryo))){ + m1 = ccc_list[[meth]][[e]]$pmat + m2 = ccc_list[['True Space']][[e]]$pmat + ccc_dist[[meth]][e] = norm(m1 - m2 , '2') + } +} + +ccc_plot_list = names(ccc_list) %>% + map(~ccc_list[[.x]][[5]]$pmat %>% cellCellContactHeatmapTriangle(celltype_colours, title=method_code[.x])) + +# Scores' data table +ccc_dt = names(ccc_dist) %>% + map(~names(ccc_dist[[.x]]) %>% map(function(y) + data.table(method=.x, embryo=y, corr=ccc_dist[[.x]][y]) + ) %>% purrr::reduce(rbind) + ) %>% + purrr::reduce(rbind) + + +``` + + +## Correlation between the true distance matrix and the prediction distance matrix +```{r calc_cor, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'cor_list.Rds')) +){ + cor_list = list() + for(meth in setdiff(names(map_2d), 'True Space')){ + + # for(meth in 'sagenet_markers'){ + cor_list[[meth]] = c() + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + cor_list[[meth]][e] = cor(c(d_mtx_true[cells, cells]), c(d_mtx[[meth]][cells, cells]), method='spearman') + } + } + + obj_list[['cor_list']] = cor_list + gc() +}else{ + cor_list = readRDS(file.path(obj_dir, 'cor_list.Rds')) +} + +cor_dt = names(cor_list) %>% + map(~data.table(method=.x, embryo=names(cor_list[[.x]]), corr=cor_list[[.x]])) %>% + purrr::reduce(rbind) +``` + +## Distribution of the lisi scores +```{r calc_lisi, message=FALSE, cache=FALSE} +preds_list = list() +if(overwrite | + !file.exists(file.path(obj_dir, 'lisi_vals.Rds')) +){ + preds_list[['sagenet']] = map_2d[['sagenet']] + preds_list[['exprs']] = map_2d[['exprs']] + preds_list[['sagenet_markers']] = map_2d[['sagenet_markers']] + preds_list[['True Space']] = meta_dt[, .(x,y)] + preds_list[['novosparc']] = map_2d[['novosparc']] + preds_list[['tangram_cells']] = map_2d[['tangram_cells']] + # lisi_list = list() + lisi_vals = names(preds_list) %>% + map(~compute_lisi(preds_list[[.x]], meta_dt, c("class_", "cell_type")) %>% + as.data.table %>% + .[, `:=`(cell_id = ..meta_dt$cell_id, method = .x)]) %>% + purrr::reduce(rbind) + obj_list[['lisi_vals']] = lisi_vals +}else{ + lisi_vals = readRDS(file.path(obj_dir, 'lisi_vals.Rds')) +} +lisi_vals = dcast(lisi_vals, cell_id ~ method, value.var='cell_type' ) %>% + .[, (setdiff(names(.), 'cell_id')) := lapply(.SD, function(x) x/.SD$`True Space`), .SDcols = setdiff(names(.), 'cell_id')] %>% + melt(variable.name='method', value.name='LISI') %>% + .[method != 'True Space'] +methods = setdiff(unique(lisi_vals$method), 'True Space') +names(methods) = methods +lisi_vals %<>% setkey(cell_id) +lisi_vals = lisi_vals[order(cell_id)] %>% + .[meta_dt[, .(cell_id, cell_type, embryo)]] +``` + + + +## 2A: E1L1 (ref): cell types + partitionings +```{r 2A, fig.height=3, fig.width=10, message=FALSE, cache=FALSE} +p = plot_spatial(col_dt[, .(x, -y)], labels=factor(as.character(col_dt$cell_type), ordered=TRUE), label_cols=celltype_colours, hide_legend = TRUE, title = 'cell types') +coord_fixed() +p1 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.01), hide_legend = TRUE, title='partitioning 1')+coord_fixed() +p2 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.05), hide_legend = TRUE, title='partitioning 2')+coord_fixed() +p3 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.1), hide_legend = TRUE, title='partitioning 3')+coord_fixed() +p4 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.5), hide_legend = TRUE, title='partitioning 4')+coord_fixed() +p5 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_1), hide_legend = TRUE, title='partitioning 5')+coord_fixed() + +fig_list[['A']] = p + p1 + p2 + p3 + p4 + p5 + plot_layout(nrow=1) +fig_list[['A']] +# plist +``` + +## 2B-D: Benchmark plots; dist. correlation, lisi scores, and cell-cell affinity dist. +```{r 2B_D, fig.height=8, fig.width=30, message=FALSE, cache=FALSE} +fig_list[['2B']] = cor_dt[method != 'sagenet_markers'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space', 'sagenet_markers')), + labels=method_code[!names(method_code) %in% c('True Space', 'sagenet_markers')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) +fig_list[['2B']] + +fig_list[['2C']] = ccc_dt[method != 'True Space' & method != 'sagenet_markers'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space', 'sagenet_markers')), + labels=method_code[!names(method_code) %in% c('True Space', 'sagenet_markers')]) + + labs(color='method', y = "cell-type afinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + + +fig_list[['2D']] = lisi_vals[method != 'sagenet_markers'] %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals[method != 'sagenet_markers']$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['2B-D']] = + fig_list[['2B']] + + plot_spacer() + + fig_list[['2C']] + + plot_spacer() + + fig_list[['2D']] + + plot_layout(width=c(9, 1, 9, 1, 8), ncol = 5, guides='collect') +fig_list[['2B-D']] +``` + + +## Supp2A-C: Benchmark plots; dist. correlation, lisi scores, and cell-cell affinity dist. +```{r Supp2A_C, fig.height=7, fig.width=31, message=FALSE, cache=FALSE} +fig_list[['Supp2A']] = cor_dt %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) +fig_list[['Supp2A']] + +fig_list[['Supp2B']] = ccc_dt[method != 'True Space'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "cell-type afinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + + +fig_list[['Supp2C']] = lisi_vals %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['Supp2']] = + fig_list[['Supp2A']] + + plot_spacer() + + fig_list[['Supp2B']] + + plot_spacer() + + fig_list[['Supp2C']] + + plot_layout(width=c(9, 1, 9, 1, 9), ncol = 5, guides='collect') +fig_list[['Supp2']] +``` + +## 2E&Supp2D: UMAP plosts of expression space and the reconstructed space +```{r 2E_Supp2D, fig.height=10, fig.width=15, message=FALSE, cache=FALSE, eval=TRUE} +meta_dt %<>% .[, .SD[sample(.N)]] +meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] +meta_dt$ent = meta_dt$ent_plot = (1 - meta_dt$ent/(max(meta_dt$ent)))^2 +exprs_p = plot_2d( + dim_df = meta_dt[, .(expr_tsne_1, expr_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 2.5, +) + labs(x = '', y = '') + + +markers_p = plot_2d( + dim_df = meta_dt[, .(markers_tsne_1, markers_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet-SIG', + sz = 2.5 +) + labs(x = '', y = '') + +sagenet_p = plot_2d( + dim_df = meta_dt[, .(sagenet_1, sagenet_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet', + sz = 2.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '') + +lgened1 = (plot_2d( + dim_df = meta_dt[order(cell_type), .(sagenet_1, sagenet_2)], + labels = meta_dt[order(cell_type)]$cell_type, + label_cols = celltype_colours, + hide_legend = FALSE, + title = 'SageNet', + sz = 5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', color = 'cell type')+ theme(text = element_text(size = 12)) ) %>% get_legend() %>% + as_ggplot() + +legend2 = ( + ggplot(meta_dt) + + aes(x = 1, y = 1, alpha=ent_plot)+ + geom_point(size=3) + + labs(alpha='confidence score') + theme(text = element_text(size = 8)) ) %>% get_legend() %>% + as_ggplot() + +novosparc_p = plot_2d( + dim_df = meta_dt[, .(novosparc_1, novosparc_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'novoSpaRc', + sz = 2.5 +) + labs(x = '', y = '') + +tangram_p = plot_2d( + dim_df = meta_dt[, .(tangram_1, tangram_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Tangram', + sz = 2.5 +) + labs(x = '', y = '') + +fig_list[['2E']] = + sagenet_p + + exprs_p + + lgened1 + + novosparc_p + + tangram_p + + legend2 + + plot_layout(ncol = 3, widths=c(5, 5, 5, 5, 5, 5)) +fig_list[['2E']] + +fig_list[['Supp2D']] = + sagenet_p + + markers_p + + exprs_p + + novosparc_p + + tangram_p + + lgened1 + + plot_layout(ncol = 3, widths=c(5, 5, 5, 5, 5, 5)) +fig_list[['Supp2D']] +``` + +## Supp1: GIN on Embryo1 layer 1 +```{r Supp1, fig.height=7.5, fig.width=7.5, message=FALSE, cache=FALSE} +g_obj = graph_from_adjacency_matrix(adj, mode='undirected') +d = cluster_louvain(g_obj) +grp = .palette2[membership(d)] +lay = layout_nicely(g_obj) +graph_col_comm(graph=g_obj, lay=lay, grp=grp, title='Gene interaction network on embryo 1 layer 1 (E1_L1)', labels=rownames(adj)) +graph_col_comm(graph=g_obj, lay=lay, grp=ifelse(rownames(adj) %in% markers, grp, '#e6e6e6'), title='Markers', labels=ifelse(rownames(adj) %in% markers, rownames(adj), '')) +``` + +## Supp3: Cell-type contact maps +```{r Supp3, fig.height=15, fig.width=15, message=FALSE, cache=FALSE} +fig_list[['Supp3']] = ccc_plot_list +fig_list[['Supp3']] +``` + + +## Supp4: LISI scores by cell type and embryo +```{r Supp4, fig.height=30, fig.width=50, message=FALSE, cache=FALSE} +fig_list[['Supp4A']] = lisi_vals %>% + ggplot + + aes(x = factor(embryo, ordered=TRUE, levels=embryo_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x = 'query dataset') + + theme(text = element_text(size = 50), + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=embryo_code) + +fig_list[['Supp4B']] = lisi_vals %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE), y = log2(LISI), color=factor(cell_type, ordered=TRUE), fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x = 'cell type') + + theme(text = element_text(size = 50), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + +fig_list[['Supp4']] = + fig_list[['Supp4A']] + + plot_spacer() + + fig_list[['Supp4B']] + + plot_layout(ncol=1, heights=c(14, 2, 14), guides='collect') +fig_list[['Supp4']] +``` + + +## Supp5: Spatially informative genes (SIGs) +```{r Supp5, fig.height=4, fig.width=6, message=FALSE, cache=FALSE, eval=TRUE} + +for(m in markers[1:2]){ + + p1 = plot_2d_cont( + dim_df = meta_dt[embryo == 'embryo1_2', .(x, -y)], + labels = assays(sce)[[1]][m, meta_dt[embryo == 'embryo1_2']$cell_id], + # label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 1.5 + # alpha = meta_dt$ent, + ) + labs(x = '', y = '', color='') + + coord_fixed() + + theme_void() + + theme(legend.position='none') + + p2 = plot_2d_cont( + dim_df = meta_dt[, .(sagenet_1, sagenet_2)], + labels = assays(sce)[[1]][m, meta_dt$cell_id], + # label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 1.5 + # alpha = meta_dt$ent, + ) + labs(x = '', y = '', color='') + + theme_void() + + theme(legend.position='none') + + print(p1 + p2 + plot_annotation(title = m, theme = theme(plot.title = element_text(size = 30, hjust = 0.5, face='italic')))) + +} +``` + +## Supp6: Co-localization of gut tube with Splanchnic and Lateral plate mesoderms +```{r Supp6, fig.height=4, fig.width=13, message=FALSE, cache=FALSE, eval=TRUE} + +types = copy(meta_dt)[!(cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm')), cell_type := 'other']$cell_type +ord = order(-as.numeric(types)) +types %<>% .[ord] + +sagenet_p = plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet', + sz = 1.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '') + coord_fixed() + +exprs_p = plot_2d( + dim_df = meta_dt[ord, .(expr_tsne_1, expr_tsne_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 1.5, + alpha=2 +) + labs(x = '', y = '') + + +lgened = (plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = FALSE, + title = 'SageNet', + sz = 5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', color='cell type')+ theme(text = element_text(size = 15)) ) %>% get_legend() %>% + as_ggplot() + + +p = plot_spatial(col_dt[, .(x, -y)], + labels=factor(ifelse(col_dt$cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm'), col_dt$cell_type, NA)), + label_cols=celltype_colours, + hide_legend = TRUE, + title = 'Embryo 1 layer 1 (E1_L1)', + sz=1) +coord_fixed() + +fig_list[['supp6']] = + sagenet_p + + exprs_p + + p + + lgened + + plot_layout(ncol = 4, widths=c(4, 4, 2.5, 2.5)) +fig_list[['supp6']] + +``` + + + +## Supp7: Confidence scores +```{r Supp7, fig.height=12, fig.width=8, message=FALSE, cache=FALSE} +meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] +meta_dt$ent = (1 - meta_dt$ent/(max(meta_dt$ent))) +# tab_dt = col_dt[, .N, by = 'cell_type'] %>% .[order(N), ] +tab_dt_query = copy(meta_dt[, .(ent=median(ent), .N, dataset='query'), by = 'cell_type']) %>% .[order(ent), ] +celltype_ord = unique(tab_dt_query$cell_type) +tab_dt_ref = col_dt[, .(.N, dataset='reference'), by = 'cell_type'] +tab_dt = rbind(tab_dt_ref, tab_dt_query[, .(N, dataset, cell_type)]) + +data_summary <- function(x) { + m <- median(x) + ymin <- m-sd(x) + ymax <- m+sd(x) + return(c(y=m,ymin=ymin,ymax=ymax)) +} + +p1 = meta_dt %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE, levels= celltype_ord), y = ent, color=cell_type) + + # geom_violin() + + # + geom_jitter(height = 0, width = 0.1) + + theme_bw() + + scale_color_manual(values=celltype_colours, na.value='#e6e6e6', drop=TRUE, limits=unique(tab_dt$cell_type)) + + labs(y='confidence score', x= '') + + stat_summary(fun.data=data_summary, size=1.5) + + coord_flip() + + # theme(legend.position='none') + + theme(legend.position='none', axis.ticks.y=element_blank(), axis.text.y=element_blank()) + + +p2 = tab_dt %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE, levels= celltype_ord), y = N, color=dataset) + + # geom_bar(position="dodge", stat="identity") + + geom_point(size = 3) + + theme_bw() + + # scale_fill_manual(values=celltype_colours, na.value='#e6e6e6', drop=TRUE, limits=unique(tab_dt$cell_type)) + + scale_color_manual(values=c('#FB8072', '#1e90ff'), na.value='#e6e6e6') + + labs(y='#cells', x= 'cell type', fill='cell type') + + theme(axis.text.x=element_text(angle = 90, hjust = 1)) + + coord_flip() + + + +p_list = list() +for(emb in c('embryo2_2', 'embryo2_5', 'embryo3_2', 'embryo3_5')){ + p_list[[emb]] = plot_2d( + dim_df = meta_dt[embryo==emb, .(x, -y)], + labels = meta_dt[embryo==emb]$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = embryo_code[emb], + sz = 2, + alpha = meta_dt[embryo==emb]$ent_plot, + ) + labs(x = '', y = '') + coord_fixed() +} + +fig_list[['Supp7A']] = p2 + p1 + plot_layout(ncol = 2, guides='collect') +fig_list[['Supp7A']] + +fig_list[['Supp7B']] = (p_list %>% purrr::reduce(`+`)) + + plot_layout(ncol = 2, guides='collect', heights=c(12, 12, 12, 12)) +fig_list[['Supp7B']] +``` + + + + +## Supp8: discriminating endothelial cells +```{r Supp8, fig.height=8, fig.width=11, message=FALSE, cache=FALSE, eval=TRUE} +EC_ord = c('Endocardium', 'other Endothelium', 'other') +EC_cols = c("#F90026", "#8DB5CE", '#e6e6e6') +names(EC_cols) = EC_ord + +d = anndata::read_h5ad(preds_f[['sagenet']])$obsm$dist_map %>% + as('sparseMatrix') %>% + .[cell_idx, cell_idx] +colnames(d) = rownames(d) = colnames(sce) +CMC_ind = meta_dt[cell_type == 'Cardiomyocytes']$cell_id +EC_ind = meta_dt[cell_type == 'Endothelium']$cell_id +d_sub = d[CMC_ind, EC_ind] +EC_true_ind = EC_ind[colSums(d_sub < quantile(d_sub, 0.05)) > 0] +EC_false_ind = EC_ind[colSums(d_sub < quantile(d_sub, 0.05)) == 0] +assays(sce)[['logcounts']] = assays(sce)[[1]] +fm = scran::findMarkers(sce[, c(EC_true_ind, EC_false_ind)], groups=c(rep('TT', length(EC_true_ind)), rep('FF', length(EC_false_ind)))) %>% .$TT +fm$gene_id = rownames(fm) +fm$diff_exp = ifelse(fm$Top > 5, NA, ifelse(fm$logFC.FF > 0, 'Endocardium', 'other Endothelium')) +m_dt = meta_dt[, + .(n_MC = ifelse(cell_id %in% ..EC_true_ind, 'Endocardium', + ifelse(cell_id %in% ..EC_false_ind, 'other Endothelium', 'other')), + sagenet_1, sagenet_2, x, y, ent_plot, cell_id)] + +types = copy(meta_dt)[!(cell_type %in% c('Cardiomyocytes', 'Endothelium')), cell_type := 'other']$cell_type +ord = order(-as.numeric(factor(types, ordered=TRUE, levels=c('Endothelium', 'Cardiomyocytes', 'other')))) +types %<>% .[ord] + +types_MC = copy(m_dt)[!(n_MC %in% c('Endocardium', 'other Endothelium')), n_MC := 'other']$n_MC +ord_MC = order(-as.numeric(factor(types_MC, ordered=TRUE, levels=EC_ord))) +types_MC %<>% .[ord_MC] + +fig_list[['Supp8A']] = plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 2.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', alpha = 'confidence', color = 'cell type') + +fig_list[['Supp8B']] = plot_2d( + dim_df = m_dt[ord_MC, .(sagenet_1, sagenet_2)], + labels = types_MC, + label_cols = EC_cols, + hide_legend = FALSE, + title = '', + sz = 2.5, + alpha = m_dt$ent_plot, +) + labs(x = '', y = '', alpha='confidence', color= 'sub-type') + +# EC_CMC_2d + EC_2d + plot_layout(guides='collect') + +fig_list[['Supp8C']] = m_dt[cell_id %in% EC_ind] %>% + ggplot + + aes(x = n_MC, y = ent_plot, fill=n_MC) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=EC_cols, na.value='#e6e6e6') + + labs(y='confidence score', fill = '', x= '') + + theme(legend.position='none') + +fig_list[['Supp8D']] = data.frame(fm) %>% + ggplot + + aes(x= logFC.FF, y=-log10(p.value), color=diff_exp , label=ifelse(!is.na(diff_exp), gene_id, NA)) + + scale_color_manual(values=EC_cols, na.value='#e6e6e6') + + geom_point() + + theme_bw() + + theme(legend.position='none') + + geom_text() + + labs(x = 'logFC (Endocardium - other End.)') + + xlim(c(-1.5, 1.5)) + +fig_list[['Supp8']] = + fig_list[['Supp8A']] + + plot_spacer() + + fig_list[['Supp8B']] + + fig_list[['Supp8C']] + + plot_spacer() + + fig_list[['Supp8D']] + + plot_layout(guides='collect', ncol=3, widths=c(4, 1, 4, 4, 1, 4)) + +fig_list[['Supp8']] +``` + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig2_Supp1_to_Supp8.rmd b/analysis/Fig2_Supp1_to_Supp8.rmd new file mode 100644 index 0000000..d5f00da --- /dev/null +++ b/analysis/Fig2_Supp1_to_Supp8.rmd @@ -0,0 +1,891 @@ +--- +title: "Spatial Mouse Atlas - Seqfish - Mouse Embryo, Figure 2, Supp. Fig. 1-8" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig2_Supp1_to_Supp8.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'seqfish_mouse_embryo' +ref_tag = 'embryo1_2' +out_dir = file.path('output', tag) +obj_dir = 'int_data/seqfish_mouse_embryo/embryo1_2' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/seqfish_mouse_embryo/embryo1_2' +dir.create(fig_dir) +fig_list = list() + +methods = list.files(out_dir) +names(methods) = methods +preds_f = methods %>% + map(~file.path(out_dir, .x, paste0(paste('preds', ref_tag, 'query_data', sep='_'), '.h5ad'))) +genes_f = file.path(obj_dir, 'gene_dt_embryo1_2.txt') +imp_f = file.path(obj_dir, 'imp_embryo1_2.txt') +col_dt_f = file.path(obj_dir, 'col_dt_embryo1_2.txt') +adj_f = 'models/seqfish_mouse_embryo/embryo1_2/embryo1_2_leiden_0.1.h5ad' +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +n_markers = 3 +overwrite = FALSE +n_PCs = 30 +n_PCs_markers = 10 +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce = preds_f[['sagenet']] %>% readH5AD +cell_idx = which(sce$embryo != 'scRNAseq') +sce %<>% .[, cell_idx] +meta_dt = colData(sce) %>% as.data.table +gene_names = rownames(sce) +adj = assays(readH5AD(adj_f))[[1]] +rownames(adj) = colnames(adj) = gene_names +adj = adj[rowSums(adj) != 0, rowSums(adj) != 0] + + +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + preds_list = list() + preds_list[['sagenet']] = anndata::read_h5ad(preds_f[['sagenet']])$obsm$dist_map + adj = anndata::read_h5ad(preds_f[['sagenet']])$varm$adj + preds_list[['sagenet']] %<>% as('sparseMatrix') %>% .[cell_idx, cell_idx] + ## Novosparc predictions + preds_list[['novosparc']] = preds_f[['novosparc']] %>% read_preds_novosparc %>% .[cell_idx, ] + gc() + # Tangram predictions + preds_list[['tangram_cells']] = preds_f[['tangram_cells']] %>% read_preds_tangram %>% .[cell_idx, ] + gc() + + +gc() +} + +imp = imp_f %>% fread %>% setnames('V1', 'GENE') +genes = imp$GENE +imp %<>% .[,-1] %>% as.matrix +rownames(imp) = genes +imp_abs = abs(imp) +# imp = apply(imp, 2, function(x) (x)/(max(x))) +imp_dt = apply(imp, 2, function(x) rownames(imp)[order(-x)[1:n_markers]]) %>% + as.data.table +markers = imp_dt %>% unlist %>% setdiff(c()) +print(markers) +col_dt = col_dt_f %>% fread +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + map_2d[['tangram_cells']] = preds_list[['tangram_cells']] + map_2d[['True Space']] = as.matrix(meta_dt[, .(x,y)]) + rownames(map_2d[['True Space']]) = meta_dt$cell_id + map_2d[['novosparc']] = preds_list[['novosparc']] + + # sagenet distances + d[['sagenet']] = preds_list[['sagenet']] %>% as.dist + d_mtx[['sagenet']] = d[['sagenet']] %>% as.matrix + rownames(d_mtx[['sagenet']]) = colnames(d_mtx[['sagenet']]) = colnames(sce) + map_2d[['sagenet']] = Rtsne(d_mtx[['sagenet']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['sagenet']]) = colnames(sce) + meta_dt[, `:=`(sagenet_1 = ..map_2d[['sagenet']][cell_id, 1], + sagenet_2 = ..map_2d[['sagenet']][cell_id, 2])] + + # distances based on expression values + expr_pcs = prcomp(assays(sce)[[1]])$rotation[,1:10] + d[['exprs']] = dist(expr_pcs) %>% as.dist + d_mtx[['exprs']] = d[['exprs']] %>% as.matrix + rownames(d_mtx[['exprs']]) = colnames(d_mtx[['exprs']]) = colnames(sce) + # map_2d[['exprs']] = uwot::umap(d[['exprs']], n_neighbors=50) + map_2d[['exprs']] = Rtsne(d_mtx[['exprs']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['exprs']]) = colnames(sce) + meta_dt[, `:=`(expr_tsne_1 = ..map_2d[['exprs']][cell_id, 1], + expr_tsne_2 = ..map_2d[['exprs']][cell_id, 2])] + + # distances in the marker-subset space + markers_pcs = prcomp(assays(sce[markers,])[[1]])$rotation[,1:5] + d[['sagenet_markers']] = dist(markers_pcs) %>% as.dist + d_mtx[['sagenet_markers']] = d[['sagenet_markers']] %>% as.matrix + rownames(d_mtx[['sagenet_markers']]) = colnames(d_mtx[['sagenet_markers']]) = colnames(sce) + # map_2d[['sagenet_markers']] = uwot::umap(d[['sagenet_markers']], n_neighbors=50) + map_2d[['sagenet_markers']] = Rtsne(d_mtx[['sagenet_markers']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['sagenet_markers']]) = colnames(sce) + meta_dt[, `:=`(markers_tsne_1 = ..map_2d[['sagenet_markers']][cell_id, 1], + markers_tsne_2 = ..map_2d[['sagenet_markers']][cell_id, 2])] + + # tangram + d[['tangram_cells']] = dist(preds_list[['tangram_cells']]) + d_mtx[['tangram_cells']] = d[['tangram_cells']] %>% as.matrix + rownames(d_mtx[['tangram_cells']]) = colnames(d_mtx[['tangram_cells']]) = meta_dt$cell_id + rownames(map_2d[['tangram_cells']]) = colnames(sce) + meta_dt[, `:=`(tangram_1 = ..map_2d[['tangram_cells']][cell_id, 1], + tangram_2 = ..map_2d[['tangram_cells']][cell_id, 2])] + + d[['novosparc']] = dist(preds_list[['novosparc']]) + d_mtx[['novosparc']] = d[['novosparc']] %>% as.matrix + rownames(d_mtx[['novosparc']]) = colnames(d_mtx[['novosparc']]) = meta_dt$cell_id + rownames(map_2d[['novosparc']]) = colnames(sce) + meta_dt[, `:=`(novosparc_1 = ..map_2d[['novosparc']][cell_id, 1], + novosparc_2 = ..map_2d[['novosparc']][cell_id, 2])] + + # true physical distances + d_true = dist(meta_dt[, .(x,y)]) + d_mtx_true = d_true %>% as.matrix + rownames(d_mtx_true) = colnames(d_mtx_true) = meta_dt$cell_id + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_dt + obj_list[['d']] = d + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_dt = readRDS(file.path(obj_dir, 'meta_dt.Rds')) + # d = readRDS(file.path(obj_dir, 'd.Rds')) +} +``` + +## Cell-Cell affinity scores +```{r calc_c2c, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_list.Rds')) +){ + ccc_list = list() + # True physical space + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + g_out = map_2d[['True Space']] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[['True Space']][[e]] = cellCellContact( + sce = sce[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID='cell_id') + } + + # Predicted spaces + + for(meth in setdiff(names(map_2d), 'True Space')){ + # for(meth in 'sagenet_markers'){ + ccc_list[[meth]] = list() + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + g_out = map_2d[[meth]] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[[meth]][[e]] = cellCellContact( + sce = sce[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID ='cell_id') + } + } + + obj_list[['ccc_list']] = ccc_list +}else{ + ccc_list = readRDS(file.path(obj_dir, 'ccc_list.Rds')) +} +ccc_dist = list() +for(meth in setdiff(method_ord, 'True Space')){ + ccc_dist[[meth]] = c() + for(e in levels(factor(meta_dt$embryo))){ + m1 = ccc_list[[meth]][[e]]$pmat + m2 = ccc_list[['True Space']][[e]]$pmat + ccc_dist[[meth]][e] = norm(m1 - m2 , '2') + } +} + +ccc_plot_list = names(ccc_list) %>% + map(~ccc_list[[.x]][[5]]$pmat %>% cellCellContactHeatmapTriangle(celltype_colours, title=method_code[.x])) + +# Scores' data table +ccc_dt = names(ccc_dist) %>% + map(~names(ccc_dist[[.x]]) %>% map(function(y) + data.table(method=.x, embryo=y, corr=ccc_dist[[.x]][y]) + ) %>% purrr::reduce(rbind) + ) %>% + purrr::reduce(rbind) + + +``` + + +## Correlation between the true distance matrix and the prediction distance matrix +```{r calc_cor, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'cor_list.Rds')) +){ + cor_list = list() + for(meth in setdiff(names(map_2d), 'True Space')){ + + # for(meth in 'sagenet_markers'){ + cor_list[[meth]] = c() + for(e in levels(factor(meta_dt$embryo))){ + cells = meta_dt[embryo == e]$cell_id + cor_list[[meth]][e] = cor(c(d_mtx_true[cells, cells]), c(d_mtx[[meth]][cells, cells]), method='spearman') + } + } + + obj_list[['cor_list']] = cor_list + gc() +}else{ + cor_list = readRDS(file.path(obj_dir, 'cor_list.Rds')) +} + +cor_dt = names(cor_list) %>% + map(~data.table(method=.x, embryo=names(cor_list[[.x]]), corr=cor_list[[.x]])) %>% + purrr::reduce(rbind) +``` + +## Distribution of the lisi scores +```{r calc_lisi, message=FALSE, cache=FALSE} +preds_list = list() +if(overwrite | + !file.exists(file.path(obj_dir, 'lisi_vals.Rds')) +){ + preds_list[['sagenet']] = map_2d[['sagenet']] + preds_list[['exprs']] = map_2d[['exprs']] + preds_list[['sagenet_markers']] = map_2d[['sagenet_markers']] + preds_list[['True Space']] = meta_dt[, .(x,y)] + preds_list[['novosparc']] = map_2d[['novosparc']] + preds_list[['tangram_cells']] = map_2d[['tangram_cells']] + # lisi_list = list() + lisi_vals = names(preds_list) %>% + map(~compute_lisi(preds_list[[.x]], meta_dt, c("class_", "cell_type")) %>% + as.data.table %>% + .[, `:=`(cell_id = ..meta_dt$cell_id, method = .x)]) %>% + purrr::reduce(rbind) + obj_list[['lisi_vals']] = lisi_vals +}else{ + lisi_vals = readRDS(file.path(obj_dir, 'lisi_vals.Rds')) +} +lisi_vals = dcast(lisi_vals, cell_id ~ method, value.var='cell_type' ) %>% + .[, (setdiff(names(.), 'cell_id')) := lapply(.SD, function(x) x/.SD$`True Space`), .SDcols = setdiff(names(.), 'cell_id')] %>% + melt(variable.name='method', value.name='LISI') %>% + .[method != 'True Space'] +methods = setdiff(unique(lisi_vals$method), 'True Space') +names(methods) = methods +lisi_vals %<>% setkey(cell_id) +lisi_vals = lisi_vals[order(cell_id)] %>% + .[meta_dt[, .(cell_id, cell_type, embryo)]] +``` + + + +## 2A: E1L1 (ref): cell types + partitionings +```{r 2A, fig.height=3, fig.width=10, message=FALSE, cache=FALSE} +p = plot_spatial(col_dt[, .(x, -y)], labels=factor(as.character(col_dt$cell_type), ordered=TRUE), label_cols=celltype_colours, hide_legend = TRUE, title = 'cell types') +coord_fixed() +p1 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.01), hide_legend = TRUE, title='partitioning 1')+coord_fixed() +p2 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.05), hide_legend = TRUE, title='partitioning 2')+coord_fixed() +p3 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.1), hide_legend = TRUE, title='partitioning 3')+coord_fixed() +p4 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_0.5), hide_legend = TRUE, title='partitioning 4')+coord_fixed() +p5 = plot_spatial(col_dt[, .(x, -y)], labels=factor(col_dt$leiden_1), hide_legend = TRUE, title='partitioning 5')+coord_fixed() + +fig_list[['A']] = p + p1 + p2 + p3 + p4 + p5 + plot_layout(nrow=1) +fig_list[['A']] +# plist +``` + +## 2B-D: Benchmark plots; dist. correlation, lisi scores, and cell-cell affinity dist. +```{r 2B_D, fig.height=8, fig.width=30, message=FALSE, cache=FALSE} +fig_list[['2B']] = cor_dt[method != 'sagenet_markers'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space', 'sagenet_markers')), + labels=method_code[!names(method_code) %in% c('True Space', 'sagenet_markers')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + +fig_list[['2C']] = ccc_dt[method != 'True Space' & method != 'sagenet_markers'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space', 'sagenet_markers')), + labels=method_code[!names(method_code) %in% c('True Space', 'sagenet_markers')]) + + labs(color='method', y = "cell-type affinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + + +fig_list[['2D']] = lisi_vals[method != 'sagenet_markers'] %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals[method != 'sagenet_markers']$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['2B-D']] = + fig_list[['2B']] + + plot_spacer() + + fig_list[['2C']] + + plot_spacer() + + fig_list[['2D']] + + plot_layout(widths=c(9, 1, 9, 1, 8), ncol = 5, guides='collect') +fig_list[['2B-D']] +``` + + +## Supp2A-C: Benchmark plots; dist. correlation, lisi scores, and cell-cell affinity dist. +```{r Supp2A_C, fig.height=7, fig.width=31, message=FALSE, cache=FALSE} +fig_list[['Supp2A']] = cor_dt %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + +fig_list[['Supp2B']] = ccc_dt[method != 'True Space'] %>% + ggplot + + aes(x = embryo, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "cell-type affinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + + scale_x_discrete(labels=embryo_code) + + +fig_list[['Supp2C']] = lisi_vals %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['Supp2']] = + fig_list[['Supp2A']] + + plot_spacer() + + fig_list[['Supp2B']] + + plot_spacer() + + fig_list[['Supp2C']] + + plot_layout(width=c(9, 1, 9, 1, 9), ncol = 5, guides='collect') +fig_list[['Supp2']] +``` + +## 2E&Supp2D: UMAP plosts of expression space and the reconstructed space +```{r 2E_Supp2D, fig.height=10, fig.width=15, message=FALSE, cache=FALSE, eval=TRUE} +meta_dt %<>% .[, .SD[sample(.N)]] +meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] +meta_dt$ent = meta_dt$ent_plot = (1 - meta_dt$ent/(max(meta_dt$ent)))^2 +exprs_p = plot_2d( + dim_df = meta_dt[, .(expr_tsne_1, expr_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 2.5, +) + labs(x = '', y = '') + + +markers_p = plot_2d( + dim_df = meta_dt[, .(markers_tsne_1, markers_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet-SIG', + sz = 2.5 +) + labs(x = '', y = '') + +sagenet_p = plot_2d( + dim_df = meta_dt[, .(sagenet_1, sagenet_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet', + sz = 2.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '') + +lgened1 = (plot_2d( + dim_df = meta_dt[order(cell_type), .(sagenet_1, sagenet_2)], + labels = meta_dt[order(cell_type)]$cell_type, + label_cols = celltype_colours, + hide_legend = FALSE, + title = 'SageNet', + sz = 5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', color = 'cell type')+ theme(text = element_text(size = 12)) ) %>% get_legend() %>% + as_ggplot() + +legend2 = ( + ggplot(meta_dt) + + aes(x = 1, y = 1, alpha=ent_plot)+ + geom_point(size=3) + + labs(alpha='confidence score') + theme(text = element_text(size = 8)) ) %>% get_legend() %>% + as_ggplot() + +novosparc_p = plot_2d( + dim_df = meta_dt[, .(novosparc_1, novosparc_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'novoSpaRc', + sz = 2.5 +) + labs(x = '', y = '') + +tangram_p = plot_2d( + dim_df = meta_dt[, .(tangram_1, tangram_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Tangram', + sz = 2.5 +) + labs(x = '', y = '') + +fig_list[['2E']] = + sagenet_p + + exprs_p + + lgened1 + + novosparc_p + + tangram_p + + legend2 + + plot_layout(ncol = 3, widths=c(5, 5, 5, 5, 5, 5)) +fig_list[['2E']] + +fig_list[['Supp2D']] = + sagenet_p + + markers_p + + exprs_p + + novosparc_p + + tangram_p + + lgened1 + + plot_layout(ncol = 3, widths=c(5, 5, 5, 5, 5, 5)) +fig_list[['Supp2D']] +``` + +## Supp1: GIN on Embryo1 layer 1 +```{r Supp1, fig.height=7.5, fig.width=7.5, message=FALSE, cache=FALSE} +g_obj = graph_from_adjacency_matrix(adj, mode='undirected') +d = cluster_louvain(g_obj) +grp = .palette2[membership(d)] +lay = layout_nicely(g_obj) +graph_col_comm(graph=g_obj, lay=lay, grp=grp, title='Gene interaction network on embryo 1 layer 1 (E1_L1)', labels=rownames(adj)) +graph_col_comm(graph=g_obj, lay=lay, grp=ifelse(rownames(adj) %in% markers, grp, '#e6e6e6'), title='Spatially informative genes (SIG)', labels=ifelse(rownames(adj) %in% markers, rownames(adj), '')) +``` + +## Supp3: Cell-type contact maps +```{r Supp3, fig.height=20, fig.width=20, message=FALSE, cache=FALSE} +fig_list[['Supp3']] = ccc_plot_list +fig_list[['Supp3']] +``` + + +## Supp4: LISI scores by cell type and embryo +```{r Supp4, fig.height=30, fig.width=50, message=FALSE, cache=FALSE} +fig_list[['Supp4A']] = lisi_vals %>% + ggplot + + aes(x = factor(embryo, ordered=TRUE, levels=embryo_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x = 'query dataset') + + theme(text = element_text(size = 50), + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=embryo_code) + +fig_list[['Supp4B']] = lisi_vals %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE), y = log2(LISI), color=factor(cell_type, ordered=TRUE), fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x = 'cell type') + + theme(text = element_text(size = 50), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + +fig_list[['Supp4']] = + fig_list[['Supp4A']] + + plot_spacer() + + fig_list[['Supp4B']] + + plot_layout(ncol=1, heights=c(14, 2, 14), guides='collect') +fig_list[['Supp4']] +``` + + +## Supp5: Spatially informative genes (SIGs) +```{r Supp5, fig.height=4, fig.width=5, message=FALSE, cache=FALSE, eval=TRUE} + +for(m in markers){ + + p1 = plot_2d_cont( + dim_df = meta_dt[embryo == 'embryo1_2', .(x, -y)], + labels = assays(sce)[[1]][m, meta_dt[embryo == 'embryo1_2']$cell_id], + # label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 1.5 + # alpha = meta_dt$ent, + ) + labs(x = '', y = '', color='') + + coord_fixed() + + theme_void() + + theme(legend.position='none') + + p2 = plot_2d_cont( + dim_df = meta_dt[, .(sagenet_1, sagenet_2)], + labels = assays(sce)[[1]][m, meta_dt$cell_id], + # label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 1.5 + # alpha = meta_dt$ent, + ) + labs(x = '', y = '', color='') + + theme_void() + + theme(legend.position='none') + + print(p1 + p2 + plot_annotation(title = m, theme = theme(plot.title = element_text(size = 30, hjust = 0.5, face='italic')))) + +} +``` + +## Supp6: Co-localization of gut tube with Splanchnic and Lateral plate mesoderms +```{r Supp6, fig.height=4, fig.width=13, message=FALSE, cache=FALSE, eval=TRUE} + +types = copy(meta_dt)[!(cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm')), cell_type := 'other']$cell_type +ord = order(-as.numeric(types)) +types %<>% .[ord] + +sagenet_p = plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet', + sz = 1.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '') + coord_fixed() + +exprs_p = plot_2d( + dim_df = meta_dt[ord, .(expr_tsne_1, expr_tsne_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 1.5, + alpha=2 +) + labs(x = '', y = '') + + +lgened = (plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = FALSE, + title = 'SageNet', + sz = 5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', color='cell type')+ theme(text = element_text(size = 15)) ) %>% get_legend() %>% + as_ggplot() + + +p = plot_spatial(col_dt[, .(x, -y)], + labels=factor(ifelse(col_dt$cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm'), col_dt$cell_type, NA)), + label_cols=celltype_colours, + hide_legend = TRUE, + title = 'Embryo 1 layer 1 (E1_L1)', + sz=1) +coord_fixed() + +fig_list[['supp6']] = + sagenet_p + + exprs_p + + p + + lgened + + plot_layout(ncol = 4, widths=c(4, 4, 2.5, 2.5)) +fig_list[['supp6']] + +``` + + + +## Supp7: Confidence scores +```{r Supp7, fig.height=8, fig.width=8, message=FALSE, cache=FALSE} +meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] +meta_dt$ent = (1 - meta_dt$ent/(max(meta_dt$ent))) +# tab_dt = col_dt[, .N, by = 'cell_type'] %>% .[order(N), ] +tab_dt_query = copy(meta_dt[, .(ent=median(ent), .N, dataset='query'), by = 'cell_type']) %>% .[order(ent), ] +celltype_ord = unique(tab_dt_query$cell_type) +tab_dt_ref = col_dt[, .(.N, dataset='reference'), by = 'cell_type'] +tab_dt = rbind(tab_dt_ref, tab_dt_query[, .(N, dataset, cell_type)]) + +data_summary <- function(x) { + m <- median(x) + ymin <- m-sd(x) + ymax <- m+sd(x) + return(c(y=m,ymin=ymin,ymax=ymax)) +} + +p1 = meta_dt %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE, levels= celltype_ord), y = ent, color=cell_type) + + # geom_violin() + + # + geom_jitter(height = 0, width = 0.1) + + theme_bw() + + scale_color_manual(values=celltype_colours, na.value='#e6e6e6', drop=TRUE, limits=unique(tab_dt$cell_type)) + + labs(y='confidence score', x= '') + + stat_summary(fun.data=data_summary, size=1.5) + + coord_flip() + + # theme(legend.position='none') + + theme(legend.position='none', axis.ticks.y=element_blank(), axis.text.y=element_blank()) + + +p2 = tab_dt %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE, levels= celltype_ord), y = N, color=dataset) + + # geom_bar(position="dodge", stat="identity") + + geom_point(size = 3) + + theme_bw() + + # scale_fill_manual(values=celltype_colours, na.value='#e6e6e6', drop=TRUE, limits=unique(tab_dt$cell_type)) + + scale_color_manual(values=c('#FB8072', '#1e90ff'), na.value='#e6e6e6') + + labs(y='#cells', x= 'cell type', fill='cell type') + + theme(axis.text.x=element_text(angle = 90, hjust = 1)) + + coord_flip() + + + +p_list = list() +for(emb in c('embryo2_2', 'embryo2_5', 'embryo3_2', 'embryo3_5')){ + p_list[[emb]] = plot_2d( + dim_df = meta_dt[embryo==emb, .(x, -y)], + labels = meta_dt[embryo==emb]$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = embryo_code[emb], + sz = 3, + alpha = meta_dt[embryo==emb]$ent_plot, + ) + labs(x = '', y = '') + coord_fixed() +} + +fig_list[['Supp7A']] = p2 + p1 + plot_layout(ncol = 2, guides='collect') +fig_list[['Supp7A']] + +fig_list[['Supp7B']] = (p_list %>% purrr::reduce(`+`)) + + plot_layout(ncol = 2, guides='collect', widths=c(4, 4, 4, 4)) +fig_list[['Supp7B']] +``` + + + + +## Supp8: discriminating endothelial cells +```{r Supp8, fig.height=8, fig.width=11, message=FALSE, cache=FALSE, eval=TRUE} +EC_ord = c('Endocardium', 'other Endothelium', 'other') +EC_cols = c("#F90026", "#8DB5CE", '#e6e6e6') +names(EC_cols) = EC_ord + +d = anndata::read_h5ad(preds_f[['sagenet']])$obsm$dist_map %>% + as('sparseMatrix') %>% + .[cell_idx, cell_idx] +colnames(d) = rownames(d) = colnames(sce) +CMC_ind = meta_dt[cell_type == 'Cardiomyocytes']$cell_id +EC_ind = meta_dt[cell_type == 'Endothelium']$cell_id +d_sub = d[CMC_ind, EC_ind] +EC_true_ind = EC_ind[colSums(d_sub < quantile(d_sub, 0.05)) > 0] +EC_false_ind = EC_ind[colSums(d_sub < quantile(d_sub, 0.05)) == 0] +assays(sce)[['logcounts']] = assays(sce)[[1]] +fm = scran::findMarkers(sce[, c(EC_true_ind, EC_false_ind)], groups=c(rep('TT', length(EC_true_ind)), rep('FF', length(EC_false_ind)))) %>% .$TT +fm$gene_id = rownames(fm) +fm$diff_exp = ifelse(fm$Top > 5, NA, ifelse(fm$logFC.FF > 0, 'Endocardium', 'other Endothelium')) +m_dt = meta_dt[, + .(n_MC = ifelse(cell_id %in% ..EC_true_ind, 'Endocardium', + ifelse(cell_id %in% ..EC_false_ind, 'other Endothelium', 'other')), + sagenet_1, sagenet_2, x, y, ent_plot, cell_id)] + +types = copy(meta_dt)[!(cell_type %in% c('Cardiomyocytes', 'Endothelium')), cell_type := 'other']$cell_type +ord = order(-as.numeric(factor(types, ordered=TRUE, levels=c('Endothelium', 'Cardiomyocytes', 'other')))) +types %<>% .[ord] + +types_MC = copy(m_dt)[!(n_MC %in% c('Endocardium', 'other Endothelium')), n_MC := 'other']$n_MC +ord_MC = order(-as.numeric(factor(types_MC, ordered=TRUE, levels=EC_ord))) +types_MC %<>% .[ord_MC] + +fig_list[['Supp8A']] = plot_2d( + dim_df = meta_dt[ord, .(sagenet_1, sagenet_2)], + labels = types, + label_cols = celltype_colours, + hide_legend = FALSE, + title = '', + sz = 2.5, + alpha = meta_dt[ord]$ent_plot, +) + labs(x = '', y = '', alpha = 'confidence', color = 'cell type') + +fig_list[['Supp8B']] = plot_2d( + dim_df = m_dt[ord_MC, .(sagenet_1, sagenet_2)], + labels = types_MC, + label_cols = EC_cols, + hide_legend = FALSE, + title = '', + sz = 2.5, + alpha = m_dt[ord_MC]$ent_plot, +) + labs(x = '', y = '', alpha='confidence', color= 'sub-type') + +# EC_CMC_2d + EC_2d + plot_layout(guides='collect') + +fig_list[['Supp8C']] = m_dt[cell_id %in% EC_ind] %>% + ggplot + + aes(x = n_MC, y = ent_plot, fill=n_MC) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=EC_cols, na.value='#e6e6e6') + + labs(y='confidence score', fill = '', x= '') + + theme(legend.position='none') + +fig_list[['Supp8D']] = data.frame(fm) %>% + ggplot + + aes(x= logFC.FF, y=-log10(p.value), color=diff_exp , label=ifelse(!is.na(diff_exp), gene_id, NA)) + + scale_color_manual(values=EC_cols, na.value='#e6e6e6') + + geom_point() + + theme_bw() + + theme(legend.position='none') + + geom_text() + + labs(x = 'logFC (Endocardium - other End.)') + + xlim(c(-1.5, 1.5)) + +fig_list[['Supp8']] = + fig_list[['Supp8A']] + + plot_spacer() + + fig_list[['Supp8B']] + + fig_list[['Supp8C']] + + plot_spacer() + + fig_list[['Supp8D']] + + plot_layout(guides='collect', ncol=3, widths=c(4, 1, 4, 4, 1, 4)) + +fig_list[['Supp8']] +``` + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig3A.rmd b/analysis/Fig3A.rmd new file mode 100644 index 0000000..3871341 --- /dev/null +++ b/analysis/Fig3A.rmd @@ -0,0 +1,315 @@ +--- +title: "Spatial Mouse Atlas - Seqfish - Mouse Embryo, Figure 3A" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig3A.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'seqfish_mouse_embryo/sagenet' +ref_tags = c('embryo1_2', 'embryo2_2', 'embryo3_2') +out_dir = file.path('output', tag) +obj_dir = 'int_data/seqfish_mouse_embryo/multi' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/seqfish_mouse_embryo/multi' +dir.create(fig_dir) +fig_list = list() + +names(ref_tags) = ref_tags +preds_f = ref_tags %>% map(~file.path(out_dir, sprintf('preds_%s_query_data.h5ad', .x))) +col_dt_f = file.path('int_data/seqfish_mouse_embryo/', 'embryo1_2', 'col_dt_embryo1_2.txt') +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +overwrite = FALSE +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce = preds_f[['embryo1_2']] %>% readH5AD +cell_idx = which(sce$embryo != 'scRNAseq') +sce_query = sce[, cell_idx] +meta_query = colData(sce_query) %>% as.data.table +rm(sce) +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + # Gene importanc + preds_list = ref_tags %>% purrr::map(~(anndata::read_h5ad(preds_f[[.x]])$obsm$dist_map) %>% as('sparseMatrix') %>% .[cell_idx, cell_idx]) +gc() +} +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + map_2d[['true']] = as.matrix(meta_query[, .(x,y)]) + rownames(map_2d[['true']]) = meta_query$cell_id + + # sagenet distances + for(i in ref_tags){ + d[[i]] = preds_list[[i]] %>% as.dist + d_mtx[[i]] = d[[i]] %>% as.matrix + d_mtx[[i]] = d_mtx[[i]]/norm(d_mtx[[i]]) + rownames(d_mtx[[i]]) = colnames(d_mtx[[i]]) = colnames(sce_query) + map_2d[[i]] = Rtsne(d_mtx[[i]], is_distance=T, perplexity=20)$Y + # map_2d[[i]] = uwot::umap(d[[i]]) + rownames(map_2d[[i]]) = colnames(sce_query) + meta_query[[paste0(i, '_1')]] = map_2d[[i]][meta_query$cell_id, 1] + meta_query[[paste0(i, '_2')]] = map_2d[[i]][meta_query$cell_id, 2] + # meta_query[, `:=`(i = ..map_2d[[i]][cell_id, 1], + # i = ..map_2d[[i]][cell_id, 2])] + preds_list[[i]] = NULL + } + + d_mtx[['all']] = (d_mtx %>% purrr::reduce(`+`))/3 + rownames(d_mtx[['all']]) = colnames(d_mtx[['all']]) = colnames(sce_query) + map_2d[['all']] = Rtsne(d_mtx[['all']], is_distance=T, perplexity=20)$Y + # map_2d[['all']] = uwot::umap(as.dist(d[[i]])) + rownames(map_2d[['all']]) = colnames(sce_query) + meta_query[, `:=`(sagenet_all_1 = ..map_2d[['all']][cell_id, 1], + sagenet_all_2 = ..map_2d[['all']][cell_id, 2])] + + # true physical distances + d_true = dist(meta_query[, .(x,y)]) + d_mtx_true = d_true %>% as.matrix + rownames(d_mtx_true) = colnames(d_mtx_true) = meta_query$cell_id + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_query + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_query = readRDS(file.path(obj_dir, 'meta_dt.Rds')) +} + +methods = c('true', 'all', 'embryo3_2', 'embryo2_2', 'embryo1_2') +method_code = c( + 'true' = 'True Space', + 'all' = 'Ensemble', + 'embryo3_2' = 'E3_L1', + 'embryo2_2' = 'E2_L1', + 'embryo1_2' = 'E1_L1' +) +method_cols = c('#E1C239', "#1965B0", '#FF7F00', "#FDB462", "#FB8072") +method_ord = names(method_cols) = names(methods) = methods +``` + +## Correlation between the true distance matrix and the prediction distance matrix +```{r calc_cor, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'cor_list.Rds')) +){ + cor_list = list() + for(meth in setdiff(names(map_2d), 'true')){ + + # for(meth in 'sagenet_markers'){ + cor_list[[meth]] = c() + for(e in levels(factor(meta_query$embryo))){ + cells = meta_query[embryo == e]$cell_id + cor_list[[meth]][e] = cor(c(d_mtx_true[cells, cells]), c(d_mtx[[meth]][cells, cells]), method='spearman') + } + } + + obj_list[['cor_list']] = cor_list + gc() +}else{ + cor_list = readRDS(file.path(obj_dir, 'cor_list.Rds')) +} + +cor_dt = names(cor_list) %>% + map(~data.table(method=.x, embryo=names(cor_list[[.x]]), corr=round(cor_list[[.x]],digits=2))) %>% + purrr::reduce(rbind) %>% + .[, rank:=dense_rank(corr), by='embryo'] + +``` + +## Cell2Cell afinity scores +```{r calc_c2c, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_list.Rds')) +){ + ccc_list = list() + # True physical space + for(e in levels(factor(meta_query$embryo))){ + cells = meta_query[embryo == e]$cell_id + g_out = map_2d[['true']] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[['true']][[e]] = cellCellContact( + sce = sce_query[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID='cell_id') + } + + # Predicted spaces + + for(meth in setdiff(names(map_2d), 'true')){ + # for(meth in 'sagenet_markers'){ + ccc_list[[meth]] = list() + for(e in levels(factor(meta_query$embryo))){ + cells = meta_query[embryo == e]$cell_id + g_out = map_2d[[meth]] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[[meth]][[e]] = cellCellContact( + sce = sce_query[, cells], + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID ='cell_id') + } + } + + obj_list[['ccc_list']] = ccc_list +}else{ + ccc_list = readRDS(file.path(obj_dir, 'ccc_list.Rds')) +} +ccc_dist = list() +for(meth in setdiff(names(map_2d), 'true')){ + ccc_dist[[meth]] = c() + for(e in levels(factor(meta_query$embryo))){ + m1 = ccc_list[[meth]][[e]]$pmat + m2 = ccc_list[['true']][[e]]$pmat + ccc_dist[[meth]][e] = norm(m1 - m2 , '2') + } +} + +# Scores' data table +ccc_dt = names(ccc_dist) %>% + map(~names(ccc_dist[[.x]]) %>% map(function(y) + data.table(method=.x, embryo=y, corr=round(ccc_dist[[.x]][y],digits=2)) + ) %>% purrr::reduce(rbind) + ) %>% + purrr::reduce(rbind) %>% + .[, rank:=dense_rank(corr), by='embryo'] + + +``` + +## 3A: Benchmark plots; dist. correlation, cell2cell affinity dist. +```{r 3A, fig.height=4, fig.width=9, message=FALSE, cache=FALSE} +colblu <- colorRampPalette(c("white", "#00bfff")) + +fig_list[['3A_left']] = cor_dt %>% + ggplot + + aes(x = embryo, fill=rank, y=factor(method, ordered=TRUE, levels=method_ord)) + + scale_fill_gradientn(colors = colblu(10), breaks = c(1, 4), labels = c("low", "high"))+ + geom_tile() + + geom_text(aes(label=round(corr,digits=2))) + + theme_minimal() + + theme(plot.title = element_text(hjust = 0.5), + legend.position='bottom', + axis.text.x=element_text(angle = 45, hjust = 1), + text = element_text(size = 15)) + + labs(title="Spearman's rank correlation", x='query dataset', y= 'reference dataset', fill="") + + scale_x_discrete(labels=embryo_code) + + scale_y_discrete(labels=method_code) + +colred <- colorRampPalette(c("#A10037", "white")) +fig_list[['3A_right']] = ccc_dt %>% + ggplot + + aes(x = embryo, fill=rank, y=factor(method, ordered=TRUE, levels=method_ord)) + + scale_fill_gradientn(colors = colred(10), breaks = c(1, 4), labels = c("low", "high"))+ + geom_tile() + + geom_text(aes(label=round(corr,digits=2))) + + theme_minimal() + + theme(plot.title = element_text(hjust = 0.5), + legend.position='bottom', + axis.text.x=element_text(angle = 45, hjust = 1), + axis.ticks.y=element_blank(), + axis.text.y=element_blank(), + text = element_text(size = 15)) + + labs(title="cell-type affinity distance", x='query dataset', y= '', fill="") + + scale_x_discrete(labels=embryo_code) + + +fig_list[['3A']] = fig_list[['3A_left']] + fig_list[['3A_right']] + + plot_layout(ncol = 2) +fig_list[['3A']] +``` + + + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig3B_C_Supp9_Supp10.rmd b/analysis/Fig3B_C_Supp9_Supp10.rmd new file mode 100644 index 0000000..50495d7 --- /dev/null +++ b/analysis/Fig3B_C_Supp9_Supp10.rmd @@ -0,0 +1,439 @@ +--- +title: "Spatial Mouse Atlas - Seqfish - Mouse Embryo, Figure 3B-C and Supp. Fig. 9-10" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig3B_C_Supp9_Supp10.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'seqfish_mouse_embryo/sagenet' +ref_tags = c('embryo1_2', 'embryo2_2', 'embryo3_2') +query_tag = c('scRNAseq', 'embryo1_5', 'embryo2_5', 'embryo3_5') +out_dir = file.path('output', tag) +obj_dir = 'int_data/seqfish_mouse_embryo/all' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/seqfish_mouse_embryo/all' +dir.create(fig_dir) +fig_list = list() + +names(ref_tags) = ref_tags +preds_f = ref_tags %>% map(~file.path(out_dir, sprintf('preds_%s_query_data.h5ad', .x))) +col_dt_f = file.path('int_data/seqfish_mouse_embryo/', 'embryo1_2', 'col_dt_embryo1_2.txt') +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +overwrite = FALSE +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce_query = preds_f[['embryo1_2']] %>% readH5AD +cell_idx = which(sce_query$embryo %in% query_tag) +sce_query = sce_query[, cell_idx] +meta_query = colData(sce_query) %>% as.data.table +# rm(sce_query) + +gc() +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + ent = preds_f %>% map(readH5AD) %>% map(get_confidence) %>% purrr::reduce(`+`) %>% .[cell_idx] + meta_query$ent = ent / max(ent) + # Gene importanc + preds_list = ref_tags %>% purrr::map(~(anndata::read_h5ad(preds_f[[.x]])$obsm$dist_map) %>% as('sparseMatrix') %>% .[cell_idx, cell_idx]) +gc() +} + +col_dt = col_dt_f %>% fread +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + map_2d[['true']] = as.matrix(meta_query[, .(x,y)]) + rownames(map_2d[['true']]) = meta_query$cell_id + + # sagenet distances + for(i in ref_tags){ + d[[i]] = preds_list[[i]] %>% as.dist + d_mtx[[i]] = d[[i]] %>% as.matrix + d_mtx[[i]] = d_mtx[[i]]/norm(d_mtx[[i]]) + rownames(d_mtx[[i]]) = colnames(d_mtx[[i]]) = colnames(sce_query) + map_2d[[i]] = Rtsne(d_mtx[[i]], is_distance=T, perplexity=50)$Y + # map_2d[[i]] = uwot::umap(d[[i]]) + rownames(map_2d[[i]]) = colnames(sce_query) + meta_query[[paste0(i, '_1')]] = map_2d[[i]][meta_query$cell_id, 1] + meta_query[[paste0(i, '_2')]] = map_2d[[i]][meta_query$cell_id, 2] + # meta_query[, `:=`(i = ..map_2d[[i]][cell_id, 1], + # i = ..map_2d[[i]][cell_id, 2])] + preds_list[[i]] = NULL + } + + d_mtx[['all']] = (d_mtx %>% purrr::reduce(`+`))/3 + rownames(d_mtx[['all']]) = colnames(d_mtx[['all']]) = colnames(sce_query) + map_2d[['all']] = Rtsne(d_mtx[['all']], is_distance=T, perplexity=50)$Y + # map_2d[['all']] = uwot::umap(as.dist(d[[i]])) + rownames(map_2d[['all']]) = colnames(sce_query) + meta_query[, `:=`(sagenet_all_1 = ..map_2d[['all']][cell_id, 1], + sagenet_all_2 = ..map_2d[['all']][cell_id, 2])] + + d[['exprs']] = dist(t(assays(sce_query)[[1]])) %>% as.dist + d_mtx[['exprs']] = d[['exprs']] %>% as.matrix + rownames(d_mtx[['exprs']]) = colnames(d_mtx[['exprs']]) = colnames(sce_query) + map_2d[['exprs']] = Rtsne(d_mtx[['exprs']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['exprs']]) = colnames(sce_query) + meta_query[, `:=`(expr_tsne_1 = ..map_2d[['exprs']][cell_id, 1], + expr_tsne_2 = ..map_2d[['exprs']][cell_id, 2])] + + d_true = dist(meta_query[, .(x,y)]) + d_mtx_true = d_true %>% as.matrix + rownames(d_mtx_true) = colnames(d_mtx_true) = meta_query$cell_id + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_query + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_query = readRDS(file.path(obj_dir, 'meta_dt.Rds')) +} +``` + + +## 3B: The reconstructed spaces +```{r 3B, fig.height=10, fig.width=12, message=FALSE, cache=FALSE, eval=TRUE} +meta_query %<>% .[, .SD[sample(.N)]] +meta_query$ent = 1 - meta_query$ent/(max(meta_query$ent)) +meta_query$ent_plot = meta_query$ent ^ 2 + +exprs_p = plot_2d( + dim_df = meta_query[, .(expr_tsne_1, expr_tsne_2)], + labels = meta_query$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 2.5 +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + +sagenet_p = plot_2d( + dim_df = meta_query[, .(sagenet_all_1, sagenet_all_2)], + labels = meta_query$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet (Ensemble)', + sz = 2.5, + alpha = meta_query$ent_plot, + label_title = 'cell type' +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + +exprs_batch_p = plot_2d( + dim_df = meta_query[, .(expr_tsne_1, expr_tsne_2)], + labels = factor(meta_query$embryo, ordered=TRUE, levels=embryo_ord), + label_cols = embryo_cols, + hide_legend = TRUE, + title = '', + sz = 2.5 +) + labs(x = '', y = '') + +sagenet_batch_p = plot_2d( + dim_df = meta_query[, .(sagenet_all_1, sagenet_all_2)], + labels = factor(meta_query$embryo, ordered=TRUE, levels=embryo_ord), + label_cols = embryo_cols, + hide_legend = TRUE, + label_title = 'query dataset', + sz = 2.5, + alpha = meta_query$ent_plot +) + labs(x = '', y = '') + + scale_color_manual(values=embryo_cols, na.value='#e6e6e6', drop=TRUE, + limits=embryo_ord[embryo_ord %in% unique(meta_query$embryo)], + labels=embryo_code) + +batch_legend = (plot_2d( + dim_df = meta_query[, .(sagenet_all_1, sagenet_all_2)], + labels = factor(meta_query$embryo, ordered=TRUE, levels=embryo_ord), + label_cols = embryo_cols, + hide_legend = FALSE, + label_title = 'query dataset', + sz = 4, + alpha = meta_query$ent_plot +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + + scale_color_manual(values=embryo_cols, na.value='#e6e6e6', drop=TRUE, + limits=embryo_ord[embryo_ord %in% unique(meta_query$embryo)], + labels=embryo_code)) %>% get_legend() %>% + as_ggplot() + + +fig_list[['3B']] = + sagenet_p + + exprs_p + + plot_spacer() + + sagenet_batch_p + + exprs_batch_p + + batch_legend + + plot_layout(ncol = 3, guides = 'collect', widths=c(5, 5, 2, 5, 5, 2)) +fig_list[['3B']] + +``` + +## LISI scores +```{r 3C, fig.height=5, fig.width=5, message=FALSE, cache=FALSE} + +methods = c('exprs', 'SageNet (Ensemble)') +method_cols = c('#666666', "#33A02C") +method_ord = names(method_cols) = names(methods) = methods + +lisi_sagenet = compute_lisi(map_2d[['all']][meta_query$cell_id,], meta_query[, .(cell_type, embryo = ifelse(embryo != + 'scRNAseq', 'seqFISH', 'scRNAseq'))], c('embryo', 'cell_type')) %>% + as.data.table %>% + .[, .(method ='SageNet (Ensemble)', embryo, cell_type)] %>% + melt(id='method') +lisi_exprs = compute_lisi(map_2d[['exprs']][meta_query$cell_id,], meta_query[, .(cell_type, embryo = ifelse(embryo != + 'scRNAseq', 'seqFISH', 'scRNAseq'))], c('embryo', 'cell_type')) %>% + as.data.table %>% + .[, .(method ='exprs', embryo, cell_type)] %>% + melt(id='method') +lisi = rbind(lisi_sagenet, lisi_exprs) %>% + setnames('variable', 'var') + +saganet_embryo = median(lisi[method == 'SageNet (Ensemble)' & var == 'embryo']$value) +print(saganet_embryo) +exprs_embryo = median(lisi[method == 'exprs' & var == 'embryo']$value) +print(exprs_embryo) + +fig_list[['3C']] = lisi %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), + y = log2(value), fill=factor(method, ordered=TRUE, levels=method_ord)) + + facet_wrap(. ~ var, scales="free_y", labeller = labeller(var = c('embryo' = 'query dataset', 'cell_type' = 'cell type'))) + + scale_fill_manual(values=method_cols) + + geom_boxplot() + + theme_bw() + + labs(x='method', y='log2(LISI)') + + theme(legend.position='none') +fig_list[['3C']] +``` + + + +## Supp9: Separation of brain sub-types +```{r Supp9, fig.height=5, fig.width=16, message=FALSE, cache=FALSE, eval=TRUE} +meta_query %>% setkey(cell_id) +brain_cells = meta_query[cell_type=='Forebrain/Midbrain/Hindbrain']$cell_id + +hc = hclust(dist(map_2d[['all']][brain_cells,])^2, "cen") +memb = cutree(hc, k = 10) +memb %<>% ifelse(. < 4, ., 'other (brain)') + +subtype_code = c('1' = 'Forebrain', '2' = 'Hindbrain', '3' = 'Midbrain', 'other (brain)' = 'other brain cells') +subtype_cols = c( + 'Forebrain' = '#00bfff', + 'Midbrain' = '#B17BA6', + 'Hindbrain' = '#FB8072', + 'other brain cells' = '#999999' +) +subtype_ord = names(subtype_cols) + +sagenet_p = plot_2d( + dim_df = meta_query[, .(sagenet_all_1, sagenet_all_2)], + labels = factor(subtype_code[memb[meta_query$cell_id]], ordered=TRUE, levels=subtype_ord), + label_cols = subtype_cols, + hide_legend = TRUE, + title = 'SageNet (Ensemble)', + sz = 2.5, + alpha = meta_query[meta_query$cell_id]$ent_plot, + label_title = 'sub-type' +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + + +legend_brain = (plot_2d( + dim_df = meta_query[, .(sagenet_all_1, sagenet_all_2)], + labels = factor(subtype_code[memb[meta_query$cell_id]], ordered=TRUE, levels=subtype_ord), + label_cols = subtype_cols, + hide_legend = FALSE, + title = 'SageNet (Ensemble)', + sz = 4, + alpha = meta_query[meta_query$cell_id]$ent_plot, + label_title = 'sub-type' +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + + scale_color_manual(values=subtype_cols, na.value='#e6e6e6', drop=TRUE, + limits=subtype_ord, + labels=c('NA' = 'other', subtype_ord))) %>% get_legend() %>% + as_ggplot() + +exprs_p = plot_2d( + dim_df = meta_query[, .(expr_tsne_1, expr_tsne_2)], + labels = factor(subtype_code[memb[meta_query$cell_id]], ordered=TRUE, levels=subtype_ord), + label_cols = subtype_cols, + hide_legend = TRUE, + title = 'Expression', + sz = 2.5, + label_title = 'sub-type' +) + labs(x = '', y = '') + theme(text = element_text(size = 15)) + + +seqfish_cells = meta_query[embryo=='embryo1_5']$cell_id +seqfish_p = plot_2d( + dim_df = meta_query[seqfish_cells, .(x, -y)], + labels = factor(subtype_code[memb[seqfish_cells]], ordered=TRUE, levels=subtype_ord), + label_cols = subtype_cols, + hide_legend = TRUE, + title = 'E1_L2', + sz = 2.5, + label_title = 'sub-type' +) + labs(x = '', y = '') + coord_fixed() + theme(text = element_text(size = 15)) + +fig_list[['Supp9']] = + sagenet_p + + exprs_p + + seqfish_p + + legend_brain + + plot_layout(ncol=4, widths=c(5, 5, 4, 2)) +fig_list[['Supp9']] +``` + +## Supp10A: confidence scores +```{r Supp10A, fig.height=9, fig.width=4, message=FALSE, cache=FALSE, eval=TRUE} +meta_query[brain_cells, cell_type := subtype_code[memb[brain_cells]]] +rownames(meta_query) = meta_query$cell_id +colData(sce_query) = DataFrame(meta_query) +colnames(sce_query) = meta_query$cell_id + +tab_dt = copy(meta_query[, .(ent=median(ent), .N, dataset='query'), by = 'cell_type']) %>% .[order(ent), ] +celltype_ord = unique(tab_dt$cell_type) + +data_summary <- function(x) { + m <- median(x) + ymin <- m-sd(x) + ymax <- m+sd(x) + return(c(y=m,ymin=ymin,ymax=ymax)) +} + +fig_list[['Supp10A']] = meta_query[cell_type %in% names(which(table(cell_type) > 20))] %>% + ggplot + + aes(x = factor(cell_type, ordered=TRUE, levels= celltype_ord), y = ent, color=cell_type) + + stat_summary(fun.data=data_summary, size=1.5) + + # + geom_jitter(height = 0, width = 0.1) + + theme_bw() + + scale_color_manual(values=c(celltype_colours, subtype_cols), na.value='#e6e6e6', drop=TRUE, limits=unique(tab_dt$cell_type)) + + labs(y='confidence score', x= '', colour='cell type') + + theme(legend.position='none', axis.text.x=element_text(angle = 90, hjust = 1)) + + coord_flip() + +fig_list[['Supp10A']] +``` + + +## Supp10B: cell-type contact map +```{r Supp10B, fig.height=20, fig.width=20, message=FALSE, cache=FALSE, eval=TRUE} + + +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_map.Rds')) +){ + sce_query$dummy = 'X' + + type_tb = table(sce_query$cell_type) + type_tb %<>% .[type_tb < 50] %>% names + sce_query %<>% .[,!sce_query$cell_type %in% type_tb] + g_out = map_2d[['all']][sce_query$cell_id,] %>% + get_delaunay(plot=FALSE) %>% + .$graph + + + ccc_map = cellCellContact( + sce = sce_query, + group = 'cell_type', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID ='cell_id') + + obj_list[['ccc_map']] = ccc_map +}else{ + ccc_map = readRDS(file.path(obj_dir, 'ccc_map.Rds')) +} + + +ccc_map = cellCellContactHeatmapTriangle(ccc_map$pmat, c(celltype_colours, subtype_cols, 'other' = '#e6e6e6'), title='') + +ccc_map +``` + + + + + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig4A_D.rmd b/analysis/Fig4A_D.rmd new file mode 100644 index 0000000..17a4887 --- /dev/null +++ b/analysis/Fig4A_D.rmd @@ -0,0 +1,544 @@ +--- +title: "Developing human heart - Spatial Transcriptomics, Figure 3A-D" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig4A_D.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +# library('pika') +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +# library('anndata') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'ST_human_heart' +ref_tag = 'ST' +data_dir = file.path('data_tidy', tag) +out_dir = file.path('output', tag) +obj_dir = 'int_data/ST_human_heart' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/ST_human_heart' +dir.create(fig_dir) +fig_list = list() +data_f = list.files(data_dir) +exp_names = str_replace(data_f[grep('h5ad$', data_f)], '.h5ad', '') +exp_names = setdiff(exp_names[!grepl('_lo', exp_names)], c('ST', 'scRNAseq', 'query')) +names(exp_names) = exp_names +sce_f = exp_names %>% + map(~file.path(data_dir, paste0(.x, '.h5ad'))) +methods = list.files(out_dir) +names(methods) = methods +preds_f = methods %>% + map(function(y) exp_names %>% + map(~file.path(out_dir, y, paste0(paste('preds', paste0(.x, '_lo'), .x, sep='_'), '.h5ad'))) + ) + +imp_f = file.path(obj_dir, 'imp_ST.txt') +col_dt_f = file.path(obj_dir, 'col_dt_ST.txt') +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +n_markers = 10 +overwrite = FALSE +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +imp = imp_f %>% fread %>% setnames('V1', 'GENE') %>% .[-1,] %>% + .[!grepl('^MT', GENE)] +genes = unlist(imp[,1]) +imp %<>% .[,-1] %>% as.matrix +rownames(imp) = genes +imp_dt = apply(imp, 2, function(x) rownames(imp)[order(-x)]) %>% + as.data.table +imp_dt %<>% .[1:n_markers,] +markers = imp_dt %>% unlist %>% unique +sce_list = sce_f %>% map(readH5AD) +s_sce = sce_list %>% purrr::reduce(cbind) +meta_dt = colData(s_sce) %>% as.data.table +rm(sce_list) + +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + # Gene importanc + + preds_list = list() + preds_list[['sagenet']] = preds_f[['sagenet']] %>% map(~anndata::read_h5ad(.x)$obsm$dist_map %>% as('sparseMatrix')) + ## Novosparc predictions + preds_list[['novosparc']] = preds_f[['novosparc']] %>% map(read_preds_novosparc) + gc() + # Tangram predictions + preds_list[['tangram_cells']] = preds_f[['tangram_cells']] %>% map(read_preds_tangram) + gc() + + +gc() +} + +col_dt = col_dt_f %>% fread %>% + .[, class__ := factor(..class__code[class__], ordered=TRUE, levels=class__ord)] + +grid_code = c( + "6" = 'top - left', + "7" = 'top - middle', + "8" = 'top - right', + "3" = 'middle - left', + "4" = 'middle - middle', + "5" = 'middle - right', + "0" = 'bottom - left', + "1" = 'bottom - middle', + "2" = 'bottom - right' +) +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + map_2d[['tangram_cells']] = preds_list[['tangram_cells']] + map_2d[['true']] = as.matrix(meta_dt[, .(x,y)]) + rownames(map_2d[['true']]) = meta_dt$cell_id + map_2d[['novosparc']] = preds_list[['novosparc']] + + + # sagenet distances + d[['sagenet']] = preds_list[['sagenet']] %>% map(as.dist) + d_mtx[['sagenet']] = d[['sagenet']] %>% map(as.matrix) + d[['tangram_cells']] = preds_list[['tangram_cells']] %>% map(dist) + d_mtx[['tangram_cells']] = d[['tangram_cells']] %>% map(as.matrix) + d[['novosparc']] = preds_list[['novosparc']] %>% map(dist) + d_mtx[['novosparc']] = d[['novosparc']] %>% map(as.matrix) + + for(i in exp_names){ + rownames(d_mtx[['sagenet']][[i]]) = colnames(d_mtx[['sagenet']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + map_2d[['sagenet']][[i]] = Rtsne(d_mtx[['sagenet']][[i]], is_distance=T)$Y + rownames(map_2d[['sagenet']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + + d[['exprs']][[i]] = dist(t(assays(s_sce[, s_sce$sample == i])[[1]])) %>% as.dist + d_mtx[['exprs']][[i]] = d[['exprs']][[i]] %>% as.matrix + rownames(d_mtx[['exprs']][[i]]) = colnames(d_mtx[['exprs']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + map_2d[['exprs']][[i]] = Rtsne(d_mtx[['exprs']][[i]], is_distance=T)$Y + rownames(map_2d[['exprs']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + + d[['sagenet_markers']][[i]] = dist(t(assays(s_sce[markers, s_sce$sample == i])[[1]])) %>% as.dist + d_mtx[['sagenet_markers']][[i]] = d[['sagenet_markers']][[i]] %>% as.matrix + rownames(d_mtx[['sagenet_markers']][[i]]) = colnames(d_mtx[['sagenet_markers']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + map_2d[['sagenet_markers']][[i]] = Rtsne(d_mtx[['sagenet_markers']][[i]], is_distance=T)$Y + rownames(map_2d[['sagenet_markers']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + + rownames(d_mtx[['tangram_cells']][[i]]) = colnames(d_mtx[['tangram_cells']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + rownames(map_2d[['tangram_cells']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + + + rownames(d_mtx[['novosparc']][[i]]) = colnames(d_mtx[['novosparc']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + rownames(map_2d[['novosparc']][[i]]) = colnames(s_sce[, s_sce$sample == i]) + + } + + # true physical distances + d_true = dist(meta_dt[, .(x,y)]) + d_mtx_true = d_true %>% as.matrix + rownames(d_mtx_true) = colnames(d_mtx_true) = meta_dt$cell_id + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_dt + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_dt = readRDS(file.path(obj_dir, 'meta_dt.Rds')) +} +``` + + +## Correlation between the true distance matrix and the prediction distance matrix +```{r calc_cor, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'cor_list.Rds')) +){ + cor_list = list() + for(meth in setdiff(names(map_2d), 'true')){ + + # for(meth in 'sagenet_markers'){ + cor_list[[meth]] = c() + for(e in levels(factor(meta_dt$sample))){ + cells = meta_dt[sample == e]$cell_id + cor_list[[meth]][e] = cor(c(d_mtx_true[cells, cells]), c(d_mtx[[meth]][[e]]), method='spearman') + } + } + + obj_list[['cor_list']] = cor_list + gc() +}else{ + cor_list = readRDS(file.path(obj_dir, 'cor_list.Rds')) +} + +cor_dt = names(cor_list) %>% + map(~data.table(method=.x, sample=names(cor_list[[.x]]), corr=cor_list[[.x]])) %>% + purrr::reduce(rbind) +``` + +## Cell2Cell afinity scores +```{r calc_c2c, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_list.Rds')) +){ + ccc_list = list() + # True physical space + ccc_list[['true']] = list() + for(e in levels(factor(meta_dt$sample))){ + + cells = meta_dt[sample == e]$cell_id + g_out = map_2d[['true']] %>% + .[cells, ] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[['true']][[e]] = cellCellContact( + sce = s_sce[, cells], + group = 'class__', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID='cell_id') + } + + # Predicted spaces + + for(meth in setdiff(names(map_2d), 'true')){ + # for(meth in 'sagenet_markers'){ + ccc_list[[meth]] = list() + for(e in levels(factor(meta_dt$sample))){ + cells = meta_dt[sample == e]$cell_id + rownames(map_2d[[meth]][[e]]) = cells + g_out = map_2d[[meth]][[e]] %>% + get_delaunay(plot=FALSE) %>% + .$graph + ccc_list[[meth]][[e]] = cellCellContact( + sce = s_sce[, cells], + group = 'class__', + graph = g_out, + nperm = 500, + plot = FALSE, + cellID ='cell_id') + } + } + + obj_list[['ccc_list']] = ccc_list +}else{ + ccc_list = readRDS(file.path(obj_dir, 'ccc_list.Rds')) +} +ccc_dist = list() +for(meth in setdiff(names(map_2d), 'true')){ + ccc_dist[[meth]] = c() + for(e in levels(factor(meta_dt$sample))){ + m1 = ccc_list[[meth]][[e]]$pmat + m2 = ccc_list[['true']][[e]]$pmat + ccc_dist[[meth]][e] = norm(m1 - m2 , '2') + } +} + +# ccc_plot_list = names(ccc_list) %>% +# map(~ccc_list[[.x]][3] %>% map(function(y) cellCellContactHeatmapTriangle(y, celltype_colours, title=.x))) + +# Scores' data table +ccc_dt = names(ccc_dist) %>% + map(~names(ccc_dist[[.x]]) %>% map(function(y) + data.table(method=.x, sample=y, corr=ccc_dist[[.x]][y]) + ) %>% purrr::reduce(rbind) + ) %>% + purrr::reduce(rbind) + + +``` + + + +## Distribution of the lisi scores +```{r calc_lisi, message=FALSE, cache=FALSE} +preds_list = list() +if(overwrite | + !file.exists(file.path(obj_dir, 'lisi_vals.Rds')) +){ + preds_list[['sagenet']] = map_2d[['sagenet']] + preds_list[['exprs']] = map_2d[['exprs']] + preds_list[['sagenet_markers']] = map_2d[['sagenet_markers']] + preds_list[['true']] = names(map_2d[['sagenet']]) %>% map(~meta_dt[sample==.x, .(x,y)]) + names(preds_list[['true']]) = names(map_2d[['sagenet']]) + preds_list[['novosparc']] = map_2d[['novosparc']] + preds_list[['tangram_cells']] = map_2d[['tangram_cells']] + # lisi_list = list() + lisi_vals = names(preds_list) %>% + map(function(y) names(preds_list[[y]]) %>% + map(~compute_lisi(preds_list[[y]][[.x]], meta_dt[sample==.x], c("class__")) %>% + as.data.table %>% + .[, `:=`(cell_id = ..meta_dt[sample==.x]$cell_id, method = y, sample = .x)]) + ) %>% map(~.x %>% purrr::reduce(rbind)) %>% purrr::reduce(rbind) + + obj_list[['lisi_vals']] = lisi_vals +}else{ + lisi_vals = readRDS(file.path(obj_dir, 'lisi_vals.Rds')) +} + +lisi_vals = dcast(lisi_vals, cell_id ~ method, value.var='LISI' ) %>% + .[, (setdiff(names(.), 'cell_id')) := lapply(.SD, function(x) x/.SD$true), .SDcols = setdiff(names(.), 'cell_id')] %>% + melt(variable.name='method', value.name='LISI') %>% + .[method != 'true'] +methods = setdiff(unique(lisi_vals$method), 'true') +names(methods) = methods +lisi_vals %<>% setkey(cell_id) +lisi_vals = lisi_vals[order(cell_id)] %>% + .[meta_dt[, .(cell_id, class__, sample)]] +``` + +## 4A: True regional labels + grids + reconstruction (ref) +```{r 4A, fig.height=24, fig.width=10, message=FALSE, cache=FALSE} +meta_dt[, class__ := factor(..class__code[class__], ordered=TRUE, levels=class__ord)] +grid2_cols = .palette1[1:length(unique(col_dt$grid_2))] +names(grid2_cols) = as.factor(as.character(unique(col_dt$grid_2))) +grid3_cols = .palette1[1:length(unique(col_dt$grid_3))] +names(grid3_cols) = as.factor(as.character(unique(col_dt$grid_3))) +grid4_cols = .palette1[1:length(unique(col_dt$grid_4))] +names(grid4_cols) = as.factor(as.character(unique(col_dt$grid_4))) +for(i in 1:length(setdiff(exp_names, 'query'))){ + c_dt = col_dt[sample == exp_names[i]] %>% .[order(as.character(class__))] + if(i == 1){ + p = plot_spatial(c_dt[, .(x, y)], labels=factor(as.character(c_dt$class__)), hide_legend = TRUE, title='region', label_cols=class__cols, sz=2, label_title='region') + coord_fixed() + labs(y=exp_names[i]) + theme(text = element_text(size = 20)) + + legend_region = (plot_spatial(c_dt[, .(x, y)], labels=factor(as.character(c_dt$class__)), hide_legend = FALSE, title='region', label_cols=class__cols, sz=5, label_title='region') + coord_fixed() + labs(y=exp_names[i]) + theme(text = element_text(size = 20))) %>% get_legend() %>% as_ggplot() + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_2), hide_legend = TRUE, title='2 * 2', sz=2, label_cols=grid2_cols) + coord_fixed() + theme(text = element_text(size = 20))) + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_3), hide_legend = TRUE, title='3 * 3', sz=2, label_cols=grid3_cols) + coord_fixed() + theme(text = element_text(size = 20))) + + legend_grid = (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_3), hide_legend = FALSE, title='3 * 3', sz=5, label_cols=grid3_cols, label_title = 'partition in 3*3 grid') + coord_fixed() + theme(text = element_text(size = 20)) + scale_color_manual(values=grid3_cols, na.value='#e6e6e6', drop=TRUE, limits=names(grid_code), labels=grid_code[names(grid3_cols)])) %>% get_legend() %>% as_ggplot() + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_4), hide_legend = TRUE, title='4 * 4', sz=2, label_cols=grid4_cols) + coord_fixed() + theme(text = element_text(size = 20))) + + p = p + (plot_2d( + dim_df = map_2d[['sagenet']][[exp_names[i]]][c_dt$cell_id, ], + labels = factor(as.character(c_dt$class__)), + hide_legend = TRUE, + title = 'SageNet', + sz = 2, + label_cols=class__cols + ) + theme(text = element_text(size = 20)) ) + + } + else{ + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(as.character(c_dt$class__)), hide_legend = TRUE, title='', label_cols=class__cols, sz=2) + coord_fixed() + labs(y=exp_names[i])) + theme(text = element_text(size = 20)) + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_2), hide_legend = TRUE, title='', sz=2, label_cols=grid2_cols) + coord_fixed()) + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_3), hide_legend = TRUE, title='', sz=2, label_cols=grid3_cols) + coord_fixed()) + + p = p + (plot_spatial(c_dt[, .(x, y)], labels=factor(c_dt$grid_4), hide_legend = TRUE, title='', sz=2, label_cols=grid4_cols) + coord_fixed()) + + p = p + (plot_2d( + dim_df = map_2d[['sagenet']][[exp_names[i]]][c_dt$cell_id, ], + labels = factor(as.character(c_dt$class__)), + hide_legend = TRUE, + title = '', + sz = 2, + label_cols=class__cols + ) ) + } +} + +fig_list[['4A']] = p + plot_layout(ncol= 5, guides = 'collect') +fig_list[['4A']] + +legend_region + legend_grid + plot_layout(ncol= 1) +``` + +## 4B-D: Benchmark plots; dist. correlation, lisi scores, and cell2cell affinity dist. +```{r 4B_D, fig.height=8, fig.width=31, message=FALSE, cache=FALSE} +fig_list[['4B']] = cor_dt[method != 'sagenet_markers'] %>% + ggplot + + aes(x = sample, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + +fig_list[['4C']] = ccc_dt[method != 'True Space' & method != 'sagenet_markers'] %>% + ggplot + + aes(x = sample, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "cell-type affinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + +# lisi_dists = round(lisi_dists,digits=2) +fig_list[['4D']] = lisi_vals[method != 'sagenet_markers'] %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['4B-D']] = + fig_list[['4B']] + + plot_spacer() + + fig_list[['4C']] + + plot_spacer() + + fig_list[['4D']] + + plot_layout(ncol = 5, widths=c(9, 1, 9, 1, 8), guides='collect') +fig_list[['4B-D']] +``` + +## Supp12: Benchmark plots; dist. correlation, lisi scores, and cell2cell affinity dist. +```{r Supp12, fig.height=8, fig.width=31, message=FALSE, cache=FALSE} +fig_list[['Supp12A']] = cor_dt %>% + ggplot + + aes(x = sample, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "Spearman's rank correlation", x = 'query dataset') + + theme_bw() + + theme(text = element_text(size = 30)) + + theme(axis.text.x=element_text(angle = 45, hjust = 1)) + + +fig_list[['Supp12B']] = ccc_dt[method != 'True Space'] %>% + ggplot + + aes(x = sample, y = corr, group = method, + color=factor(method, ordered=TRUE, levels=method_ord)) + + # geom_bar(stat="identity", position=position_dodge()) + + geom_line(size=3) + + geom_point(size=4) + + scale_color_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=setdiff(method_ord, c('True Space')), + labels=method_code[!names(method_code) %in% c('True Space')]) + + labs(color='method', y = "cell-type affinity distance", x = 'query dataset') + + theme_bw() + + theme(legend.position='none', + text = element_text(size = 30), + axis.text.x=element_text(angle = 45, hjust = 1)) + +# lisi_dists = round(lisi_dists,digits=2) +fig_list[['Supp12C']] = lisi_vals %>% + ggplot + + aes(x = factor(method, ordered=TRUE, levels=method_ord), y = log2(LISI), + fill=factor(method, ordered=TRUE, levels=method_ord)) + + geom_boxplot(color='#959595') + + theme_bw() + + scale_fill_manual(values=method_cols, na.value='#e6e6e6', drop=TRUE, + limits=unique(lisi_vals$method)) + + labs(y='log-scaled LISI (cell type)', fill = 'method', x= 'method') + + scale_color_manual(values=method_cols, na.value='#e6e6e6') + + theme(text = element_text(size = 30), + legend.position='none', + axis.text.x=element_text(angle = 45, hjust = 1)) + + geom_hline(yintercept=0, linetype="dashed", color = "black", size=2) + + scale_x_discrete(labels=method_code) + +fig_list[['Supp12']] = + fig_list[['Supp12A']] + + plot_spacer() + + fig_list[['Supp12B']] + + plot_spacer() + + fig_list[['Supp12C']] + + plot_layout(ncol = 5, widths=c(9, 1, 9, 1, 8), guides='collect') +fig_list[['Supp12']] +``` + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/Fig4E_Supp12.rmd b/analysis/Fig4E_Supp12.rmd new file mode 100644 index 0000000..2878c59 --- /dev/null +++ b/analysis/Fig4E_Supp12.rmd @@ -0,0 +1,361 @@ +--- +title: "Developing human heart - Spatial Transcriptomics; Figure 4E, Supp. Fig. 12" +author: +- name: Elyas Heidari + affiliation: + - &IMLS Institute for Molecular Life Sciences, University of Zurich, Switzerland + - Swiss Institute of Bioinformatics (SIB), University of Zurich, Switzerland +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + workflowr::wflow_html: + code_folding: hide + toc: true + toc_float: true + number_sections: false +--- + +# Setup / definitions + + +```{r setup_knitr, include=FALSE} +library('BiocStyle') +set.seed(1996) +options(bitmapType='cairo') +knitr::opts_chunk$set(autodep=TRUE, cache=FALSE, dev='pdf', cache.lazy = FALSE) +# knitr::opts_chunk$set( autodep=TRUE, cache=TRUE, cache.lazy=FALSE, dev='png' ) +knitr::opts_knit$set( root.dir='..' ) +# wflow_build(files='analysis/Fig4E_Supp12.rmd', view=F, verbose=T, delete_cache=T) +``` + +```{r setup_libs, collapse=FALSE, message=FALSE, warning=FALSE, cache=FALSE} +``` + +## Libraries + +```{r libs, message=FALSE, warning=FALSE, cache=FALSE} +# library('pika') +library('tidyverse') +library('igraph') +library('data.table') +library('SingleCellExperiment') +library('magrittr') +library('BiocParallel') +library('patchwork') +library('limma') +library('cccd') +library('philentropy') +library('cowplot') +library('zellkonverter') +library('stringr') +library('lisi') +library('ggpubr') +library('Rtsne') +# library('anndata') +``` + + +## Helper functions +```{r setup_helpers, message=FALSE, cache=FALSE} +source('code/utils/utils.R') +source('code/utils/SMA_functions.R') +``` + +## Directories +```{r setup_input, message=FALSE, cache=FALSE} +tag = 'ST_human_heart' +ref_tag = 'ST_all' +data_dir = file.path('data_tidy', tag) +out_dir = file.path('output', tag) +obj_dir = 'int_data/ST_human_heart/ST' +dir.create(obj_dir) +obj_list = list() +fig_dir = 'figures/ST_human_heart/ST' +dir.create(fig_dir) +fig_list = list() +data_f = list.files(data_dir) +exp_names = 'ST' +names(exp_names) = exp_names +sce_f = exp_names %>% + map(~file.path(data_dir, paste0(.x, '.h5ad'))) +methods = 'sagenet' +names(methods) = methods +preds_f = methods %>% + map(function(y) exp_names %>% + map(~file.path(out_dir, y, paste0(paste('preds', 'ST_all', .x, sep='_'), '.h5ad'))) + ) + +imp_f = 'int_data/ST_human_heart/imp_ST.txt' +adj_f = 'models/ST_human_heart/ST_all/ST_all_grid_2.h5ad' + +``` + + + + + +## Params +```{r setup_outputs, message=FALSE, cache=FALSE} +overwrite = FALSE +n_markers = 10 +``` + +# Load inputs +```{r load_inputs, message=FALSE, cache=FALSE} +sce_list = 'output/ST_human_heart/sagenet/preds_ST_all_ST.h5ad' %>% map(readH5AD) +s_sce = sce_list %>% purrr::reduce(cbind) +meta_dt = colData(s_sce) %>% as.data.table +colnames(s_sce) = meta_dt$cell_id +rm(sce_list) +gene_names = rownames(s_sce) +adj = assays(readH5AD(adj_f))[[1]] +rownames(adj) = colnames(adj) = gene_names +adj = adj[rowSums(adj) != 0, rowSums(adj) != 0] + +meta_dt$ent = get_confidence(s_sce) / 3 +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + # Gene importanc + + preds_list = list() + preds_list[['sagenet']] = anndata::read_h5ad('output/ST_human_heart/sagenet/preds_ST_all_ST.h5ad')$obsm$dist_map + + +gc() +} +imp = imp_f %>% fread %>% setnames('V1', 'GENE') +genes = imp$GENE +imp %<>% .[,-1] %>% as.matrix +rownames(imp) = genes +imp_abs = abs(imp) +# imp = apply(imp, 2, function(x) (x)/(max(x))) +imp_dt = apply(imp, 2, function(x) rownames(imp)[order(-x)[1:n_markers]]) %>% + as.data.table +markers = imp_dt %>% unlist %>% setdiff(c()) +print(markers) + +meta_dt %<>% + .[, class__ := factor(..class__code[class__], ordered=TRUE, levels=class__ord)] +s_sce$class__ = meta_dt$class__ + +col_dt_f = file.path('int_data/ST_human_heart/col_dt_ST.txt') +col_dt = col_dt_f %>% fread %>% + .[, class__ := factor(..class__code[class__], ordered=TRUE, levels=class__ord)] %>% + setkey(cell_id) %>% + .[order(sample)] +grid2_cols = .palette1[1:length(unique(col_dt$grid_2))] +names(grid2_cols) = as.factor(as.character(unique(col_dt$grid_2))) +grid3_cols = .palette1[1:length(unique(col_dt$grid_3))] +names(grid3_cols) = as.factor(as.character(unique(col_dt$grid_3))) +grid4_cols = .palette1[1:length(unique(col_dt$grid_4))] +names(grid4_cols) = as.factor(as.character(unique(col_dt$grid_4))) +col_dt %>% setkey(cell_id) +``` + +# Processing / calculations +## Distance matrices and embeddings +```{r calc_dists, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'map_2d.Rds')) | + !file.exists(file.path(obj_dir, 'meta_dt.Rds')) +){ + d = list() + d_mtx = list() + map_2d = list() + + + # sagenet distances + # d[['sagenet']] = preds_list[['sagenet']] %>% map(as.dist) + d_mtx[['sagenet']] = preds_list[['sagenet']] + + # for(i in exp_names){ + rownames(d_mtx[['sagenet']]) = colnames(d_mtx[['sagenet']]) = colnames(s_sce) + map_2d[['sagenet']] = Rtsne(d_mtx[['sagenet']], is_distance=T)$Y + rownames(map_2d[['sagenet']]) = colnames(s_sce) + meta_dt[, `:=`(sagenet_all_1 = ..map_2d[['sagenet']][, 1], + sagenet_all_2 = ..map_2d[['sagenet']][, 2])] + + + expr_pcs = prcomp(assays(s_sce)[[1]])$rotation[,1:10] + d[['exprs']] = dist(expr_pcs) %>% as.dist + d_mtx[['exprs']] = d[['exprs']] %>% as.matrix + rownames(d_mtx[['exprs']]) = colnames(d_mtx[['exprs']]) = colnames(s_sce) + # map_2d[['exprs']] = uwot::umap(d[['exprs']], n_neighbors=50) + map_2d[['exprs']] = Rtsne(d_mtx[['exprs']], is_distance=T, perplexity=50)$Y + rownames(map_2d[['exprs']]) = colnames(s_sce) + meta_dt[, `:=`(expr_tsne_1 = ..map_2d[['exprs']][, 1], + expr_tsne_2 = ..map_2d[['exprs']][, 2])] + + # } + + # obj_list[['d_mtx']] = append(d_mtx, d_mtx_true) + obj_list[['map_2d']] = map_2d + obj_list[['meta_dt']] = meta_dt + gc() +}else{ + map_2d = readRDS(file.path(obj_dir, 'map_2d.Rds')) + meta_query = readRDS(file.path(obj_dir, 'meta_dt.Rds')) +} +``` + + + +## 4E: UMAP plosts of expression space and the reconstructed space +```{r 4E, fig.height=4, fig.width=12, message=FALSE, cache=FALSE, eval=TRUE} +# meta_dt %<>% .[, .SD[sample(.N)]] +meta_dt %<>% + .[, class__ := factor(class__, ordered=TRUE, levels=class__ord)] %>% + .[order(class__)] +meta_dt$ent = 1 - meta_dt$ent/(max(meta_dt$ent)) + +sagenet_p = plot_2d( + dim_df = -map_2d[['sagenet']][meta_dt$cell_id,], + labels = meta_dt$class__, + label_cols = class__cols, + hide_legend = TRUE, + sz = 2.5, + alpha = meta_dt$ent, + title = 'region' +) + labs(x = '', y = '') + +sagenet_batch_p = plot_2d( + dim_df = -map_2d[['sagenet']][meta_dt$cell_id,], + labels = meta_dt$sample, + label_cols = .palette3, + hide_legend = FALSE, + sz = 2.5, + alpha = meta_dt$ent, + title = 'sample (query dataset)', + label_title = 'sample' +) + labs(x = '', y = '') + +sagenet_grid_p = plot_2d( + dim_df = -map_2d[['sagenet']][meta_dt$cell_id,], + labels = factor(col_dt[meta_dt$cell_id]$grid_3), + label_cols = grid3_cols, + hide_legend = TRUE, + sz = 2.5, + alpha = meta_dt$ent, + title = '3 * 3 grid' +) + labs(x = '', y = '') + +fig_list[['4E']] = + sagenet_p + + sagenet_grid_p + + sagenet_batch_p + + plot_layout(ncol=3, guides='collect') +fig_list[['4E']] + +exprs_p = plot_2d( + dim_df = -map_2d[['exprs']][meta_dt$cell_id,], + labels = meta_dt$class__, + label_cols = class__cols, + hide_legend = TRUE, + sz = 2.5, + alpha = meta_dt$ent, + title = 'region' +) + labs(x = '', y = '') + +exprs_batch_p = plot_2d( + dim_df = -map_2d[['exprs']][meta_dt$cell_id,], + labels = meta_dt$sample, + label_cols = .palette3, + hide_legend = FALSE, + sz = 2.5, + alpha = meta_dt$ent, + title = 'sample (query dataset)', + label_title = 'sample' +) + labs(x = '', y = '') + +exprs_grid_p = plot_2d( + dim_df = -map_2d[['exprs']][meta_dt$cell_id,], + labels = factor(col_dt[meta_dt$cell_id]$grid_3, ordered=TRUE), + label_cols = grid3_cols, + hide_legend = FALSE, + sz = 2.5, + alpha = meta_dt$ent, + title = '3 * 3 grid' +) + labs(x = '', y = '') + +fig_list[['4EX']] = + exprs_p + + exprs_grid_p + + exprs_batch_p + + plot_layout(ncol=3, guides='collect') +fig_list[['4EX']] +``` + +## Supp12: GGM +```{r Supp12, fig.height=10, fig.width=10, message=FALSE, cache=FALSE} +g_obj = graph_from_adjacency_matrix(adj, mode='undirected') +d = cluster_louvain(g_obj) +grp = .palette2[membership(d)] +lay = layout_nicely(g_obj) +graph_col_comm(graph=g_obj, lay=lay, grp=grp, title='Gene interaction network on ST dataset', labels=rownames(adj)) +graph_col_comm(graph=g_obj, lay=lay, grp=ifelse(rownames(adj) %in% markers, grp, '#e6e6e6'), title='Spatially informative genes (SIG)', labels=ifelse(rownames(adj) %in% markers, rownames(adj), '')) +``` + + +## Supp13A: +```{r Supp13A, fig.height=6, fig.width=5, message=FALSE, cache=FALSE} +tab_dt= copy(meta_dt)[, .(ent=median(ent), .N), by = 'class__'] %>% .[order(ent), ] +type_ord = unique(tab_dt$class__) +data_summary <- function(x) { + m <- median(x) + ymin <- m-sd(x) + ymax <- m+sd(x) + return(c(y=m,ymin=ymin,ymax=ymax)) +} + +fig_list[['Supp13A']] = meta_dt %>% + ggplot + + aes(x = factor(class__, ordered=TRUE, levels= type_ord), y = ent, color=class__) + + theme_bw() + + scale_color_manual(values=class__cols, na.value='#e6e6e6', drop=TRUE) + + labs(y='confidence score', x= '') + + stat_summary(fun.data=data_summary, size=1.5) + + coord_flip() + + theme(legend.position='none') +fig_list[['Supp13A']] +``` + + +## Supp13B: Cell2Cell afinity scores +```{r Supp13B, fig.height=10, fig.width=10, message=FALSE, cache=FALSE} +if(overwrite | + !file.exists(file.path(obj_dir, 'ccc_map.Rds')) +){ + + g_out = map_2d[['sagenet']] %>% + get_delaunay(plot=FALSE) %>% + .$graph + + ccc_map = cellCellContact( + sce = s_sce, + group = 'class__', + graph = g_out, + nperm = 1000, + plot = FALSE, + cellID ='cell_id') + + obj_list[['ccc_map']] = ccc_map +}else{ + ccc_map = readRDS(file.path(obj_dir, 'ccc_map.Rds')) +} +pmat = ccc_map$pmat +rownames(pmat) = colnames(pmat) = gsub(' $', '', colnames(pmat)) +fig_list[['Supp13B']] = cellCellContactHeatmapTriangle(pmat, class__cols, title='') + +fig_list[['Supp13B']] +``` + + + + + +# Save objects & plots +```{r save, fig.height=12, fig.width=12, message=FALSE, cache=FALSE} +names(obj_list) %>% map(~saveRDS(obj_list[[.x]], file.path(obj_dir, paste0(.x, '.Rds')))) +names(fig_list) %>% map(~saveRDS(fig_list[[.x]], file.path(fig_dir, paste0(.x, '.Rds')))) +``` diff --git a/analysis/MGA_data_embryo1_2.rmd b/analysis/MGA_data_embryo1_2.rmd index 828d17c..f443991 100644 --- a/analysis/MGA_data_embryo1_2.rmd +++ b/analysis/MGA_data_embryo1_2.rmd @@ -1,5 +1,5 @@ --- -title: "Figure 2: Spatial Mouse Atlas - Seqfish - Mouse Embryo" +title: "Spatial Mouse Atlas - Seqfish - Mouse Embryo, Figure 2, Supp. Fig. 1-7" author: - name: Elyas Heidari affiliation: @@ -106,13 +106,10 @@ rownames(adj) = colnames(adj) = gene_names adj = adj[rowSums(adj) != 0, rowSums(adj) != 0] - if(overwrite | !file.exists(file.path(obj_dir, 'map_2d.Rds')) | !file.exists(file.path(obj_dir, 'meta_dt.Rds')) ){ - # Gene importanc - preds_list = list() preds_list[['sagenet']] = anndata::read_h5ad(preds_f[['sagenet']])$obsm$dist_map adj = anndata::read_h5ad(preds_f[['sagenet']])$varm$adj @@ -351,12 +348,6 @@ lisi_vals %<>% setkey(cell_id) lisi_vals = lisi_vals[order(cell_id)] %>% # setnames('cell_type', 'LISI') %>% .[meta_dt[, .(cell_id, cell_type, embryo)]] -# norm_lisi = norm(as.matrix(lisi_vals[method == 'True Space']$LISI)) %>% sqrt -# lisi_vals[] -# lisi_dists = methods %>% -# map(~dist(rbind(lisi_vals[method == .x]$LISI, lisi_vals[method == 'True Space']$LISI))/norm_lisi, na.rm=T) %>% -# map(c) %>% -# unlist ``` @@ -375,8 +366,8 @@ fig_list[['A']] # plist ``` -## B: UMAP plosts of expression space and the reconstructed space -```{r B_new, fig.height=10, fig.width=15, message=FALSE, cache=FALSE, eval=TRUE} +## C: UMAP plosts of expression space and the reconstructed space +```{r C_new, fig.height=10, fig.width=15, message=FALSE, cache=FALSE, eval=TRUE} meta_dt %<>% .[, .SD[sample(.N)]] meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] meta_dt$ent = meta_dt$ent_plot = (1 - meta_dt$ent/(max(meta_dt$ent)))^2 @@ -390,16 +381,80 @@ exprs_p = plot_2d( ) + labs(x = '', y = '') -# lgened = (plot_2d( -# dim_df = meta_dt[, .(expr_tsne_1, expr_tsne_2)], -# labels = meta_dt$cell_type, -# label_cols = celltype_colours, -# hide_legend = FALSE, -# label_title = 'cell type', -# title = 'Expression', -# sz = 2.5 -# ) + labs(x = '', y = '')+ theme(text = element_text(size = 10)) ) %>% get_legend() %>% -# as_ggplot() +markers_p = plot_2d( + dim_df = meta_dt[, .(markers_tsne_1, markers_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet Markers', + sz = 2.5 +) + labs(x = '', y = '') + +sagenet_p = plot_2d( + dim_df = meta_dt[, .(sagenet_1, sagenet_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'SageNet', + sz = 2.5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '') + +lgened1 = (plot_2d( + dim_df = meta_dt[order(cell_type), .(sagenet_1, sagenet_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = FALSE, + title = 'SageNet', + sz = 5, + alpha = meta_dt$ent_plot, +) + labs(x = '', y = '', color = 'cell type')+ theme(text = element_text(size = 15)) ) %>% get_legend() %>% + as_ggplot() + +lgened2 = ( + ggplot(meta_dt) + + aes(x = 1, y = 1, alpha=ent_plot)+ + geom_point(size=5) + + labs(alpha='confidence score')) + theme(text = element_text(size = 15)) ) %>% get_legend() %>% + as_ggplot() + +novosparc_p = plot_2d( + dim_df = meta_dt[, .(novosparc_1, novosparc_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'novosparc', + sz = 2.5 +) + labs(x = '', y = '') + +tangram_p = plot_2d( + dim_df = meta_dt[, .(tangram_1, tangram_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Tangram', + sz = 2.5 +) + labs(x = '', y = '') + +fig_list[['B']] = exprs_p + markers_p + sagenet_p + novosparc_p + tangram_p + lgened + + plot_layout(ncol = 3) +fig_list[['B']] + +``` + +## BX: UMAP plosts of expression space and the reconstructed space +```{r B_X, fig.height=10, fig.width=15, message=FALSE, cache=FALSE, eval=TRUE} +meta_dt %<>% .[, .SD[sample(.N)]] +meta_dt[, ent := (ent_embryo1_2_leiden_1 + ent_embryo1_2_leiden_0.1 + ent_embryo1_2_leiden_0.01 + ent_embryo1_2_leiden_0.5 + ent_embryo1_2_leiden_0.05)/5 ] +meta_dt$ent = meta_dt$ent_plot = (1 - meta_dt$ent/(max(meta_dt$ent)))^2 +exprs_p = plot_2d( + dim_df = meta_dt[, .(expr_tsne_1, expr_tsne_2)], + labels = meta_dt$cell_type, + label_cols = celltype_colours, + hide_legend = TRUE, + title = 'Expression', + sz = 2.5, +) + labs(x = '', y = '') markers_p = plot_2d( @@ -421,15 +476,22 @@ sagenet_p = plot_2d( alpha = meta_dt$ent_plot, ) + labs(x = '', y = '') -lgened = (plot_2d( - dim_df = meta_dt[, .(sagenet_1, sagenet_2)], +lgened1 = (plot_2d( + dim_df = meta_dt[order(cell_type), .(sagenet_1, sagenet_2)], labels = meta_dt$cell_type, label_cols = celltype_colours, hide_legend = FALSE, title = 'SageNet', - sz = 2.5, + sz = 5, alpha = meta_dt$ent_plot, -) + labs(x = '', y = '', color = 'cell type')+ theme(text = element_text(size = 10)) ) %>% get_legend() %>% +) + labs(x = '', y = '', color = 'cell type')+ theme(text = element_text(size = 15)) ) %>% get_legend() %>% + as_ggplot() + +lgened2 = ( + ggplot(meta_dt) + + aes(x = 1, y = 1, alpha=ent_plot)+ + geom_point(size=5) + + labs(alpha='confidence score')) + theme(text = element_text(size = 15)) ) %>% get_legend() %>% as_ggplot() novosparc_p = plot_2d( @@ -456,9 +518,10 @@ fig_list[['B']] ``` + ```{r XN, fig.height=4, fig.width=12, message=FALSE, cache=FALSE, eval=TRUE} -types = copy(meta_dt)[!(cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm')), cell_type := '0']$cell_type +types = copy(meta_dt)[!(cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm')), cell_type := 'other']$cell_type ord = order(-as.numeric(types)) types %<>% .[ord] @@ -496,7 +559,7 @@ lgened = (plot_2d( as_ggplot() -p = plot_spatial(col_dt[, .(x, -y)], labels=factor(ifelse(col_dt$cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm'), col_dt$cell_type, NA)), label_cols=celltype_colours, hide_legend = TRUE, title = 'cell types', sz=1) +coord_fixed() +p = plot_spatial(col_dt[, .(x, -y)], labels=factor(ifelse(col_dt$cell_type %in% c('Gut tube', 'Splanchnic mesoderm', 'Lateral plate mesoderm'), col_dt$cell_type, NA)), label_cols=celltype_colours, hide_legend = TRUE, title = 'embryo1_2', sz=1) +coord_fixed() fig_list[['XN']] = exprs_p + sagenet_p + p + lgened + plot_layout(ncol = 4) @@ -545,7 +608,7 @@ for(m in markers){ ```{r XN3, fig.height=8, fig.width=10, message=FALSE, cache=FALSE, eval=TRUE} -EC_ord = c('Cardiac Endothelium', 'nonCardiac Endothelium') +EC_ord = c('Endocardium', 'other Endothelium') EC_cols = c("#F90026", "#8DB5CE") names(EC_cols) = EC_ord @@ -561,8 +624,8 @@ EC_false_ind = EC_ind[colSums(d_sub < quantile(d_sub, 0.05)) == 0] assays(sce)[['logcounts']] = assays(sce)[[1]] fm = scran::findMarkers(sce[, c(EC_true_ind, EC_false_ind)], groups=c(rep('TT', length(EC_true_ind)), rep('FF', length(EC_false_ind)))) %>% .$TT fm$gene_id = rownames(fm) -fm$diff_exp = ifelse(fm$Top > 5, NA, ifelse(fm$logFC.FF > 0, 'Cardiac Endothelium', 'nonCardiac Endothelium')) -m_dt = meta_dt[cell_id %in% EC_ind, .(n_MC = ifelse(cell_id %in% ..EC_true_ind, 'Cardiac Endothelium', 'nonCardiac Endothelium'), sagenet_1, sagenet_2, x, y, ent_plot)] +fm$diff_exp = ifelse(fm$Top > 5, NA, ifelse(fm$logFC.FF > 0, 'Endocardium', 'other Endothelium')) +m_dt = meta_dt[cell_id %in% EC_ind, .(n_MC = ifelse(cell_id %in% ..EC_true_ind, 'Endocardium', 'other Endothelium'), sagenet_1, sagenet_2, x, y, ent_plot)] EC_CMC_2d = plot_2d( @@ -604,7 +667,7 @@ EC_volcano = data.frame(fm) %>% theme_minimal() + theme(legend.position='none') + geom_text() + - labs(x = 'logFC (Cardiac End. - nonCardiac End.)') + + labs(x = 'logFC (Endocardium - other End.)') + xlim(c(-1.5, 1.5)) diff --git a/analysis/figure/MGA_data_multi_all.rmd/B-1.pdf b/analysis/figure/MGA_data_multi_all.rmd/B-1.pdf deleted file mode 100644 index f79d33b..0000000 Binary files a/analysis/figure/MGA_data_multi_all.rmd/B-1.pdf and /dev/null differ diff --git a/analysis/figure/MGA_data_multi_all.rmd/XB-1.pdf b/analysis/figure/MGA_data_multi_all.rmd/XB-1.pdf deleted file mode 100644 index f46a774..0000000 Binary files a/analysis/figure/MGA_data_multi_all.rmd/XB-1.pdf and /dev/null differ diff --git a/analysis/figure/MGA_data_multi_all.rmd/XD-1.pdf b/analysis/figure/MGA_data_multi_all.rmd/XD-1.pdf deleted file mode 100644 index 3da7e34..0000000 Binary files a/analysis/figure/MGA_data_multi_all.rmd/XD-1.pdf and /dev/null differ diff --git a/analysis/figure/MGA_data_multi_all.rmd/calc_lisi-1.pdf b/analysis/figure/MGA_data_multi_all.rmd/calc_lisi-1.pdf deleted file mode 100644 index 1cf6e9f..0000000 Binary files a/analysis/figure/MGA_data_multi_all.rmd/calc_lisi-1.pdf and /dev/null differ diff --git a/code/experiments/run_sagenet.sh b/code/experiments/run_sagenet.sh index c2ee0bf..da158d5 100755 --- a/code/experiments/run_sagenet.sh +++ b/code/experiments/run_sagenet.sh @@ -23,7 +23,7 @@ for tag_ref in ST_all; do - for tag_query in ST; do + for tag_query in query; do bsub -o .logs/sagenet/ST_human_heart -q gpu -gpu "num=1:gmem=10000" -M 4000 -R rusage[mem=3000] \ "python3 code/experiments/run_sagenet.py \ -i data_tidy --tag ST_human_heart \ diff --git a/code/sagenet/prep_query_DHH.py b/code/sagenet/prep_query_DHH.py new file mode 100644 index 0000000..1d2ea11 --- /dev/null +++ b/code/sagenet/prep_query_DHH.py @@ -0,0 +1,16 @@ +import sagenet as sg +import scanpy as sc +import squidpy as sq +import anndata as ad +import random +import numpy as np + +rna_ad = sc.read('data_tidy/ST_human_heart/scRNAseq.h5ad') +rna_ad.obs = rna_ad.obs[['cell_id']].assign(class__ = rna_ad.obs.celltype, sample = rna_ad.obs.experiment, dataset='scRNAseq') + +st_ad = sc.read('data_tidy/ST_human_heart/ST.h5ad') +st_ad.obs = st_ad.obs[['cell_id', 'class__', 'sample']].assign(dataset='ST', class__ = str(st_ad.obs.class__)) + +query_ad = st_ad.concatenate(rna_ad) + +query_ad.write('data_tidy/ST_human_heart/query.h5ad') diff --git a/code/utils/SMA_functions.R b/code/utils/SMA_functions.R index 2c3752b..359044f 100644 --- a/code/utils/SMA_functions.R +++ b/code/utils/SMA_functions.R @@ -407,7 +407,7 @@ getRandomConnectivity = function(sce, option = "observed", x_name = "x_global_affine", y_name = "y_global_affine", - splitgroups = c("embryo"), + splitgroups = c("dummy"), chunksize = 10000) { ######## new function with more functionality # option either observed or random @@ -464,7 +464,7 @@ getRandomConnectivity_dep = function(sce, option = "observed", x_name = "x_global_affine", y_name = "y_global_affine", - splitgroups = c("embryo")) { + splitgroups = c("dummy")) { ##### deprecated function!!! # option either observed or random # sce singlecellexperiment @@ -764,9 +764,9 @@ cellCellContactHeatmapTriangle <- function( width = factor*unit(ncol(mat2), "cm"), height = factor*unit(ncol(mat2), "cm"), - column_title = title, - column_title_gp = gpar(fontsize = 20, fontface = "bold"), - row_title = NULL, + row_title = title, + row_title_gp = gpar(fontsize = 30, fontface = "bold"), + column_title = NULL, na_col = NA, @@ -790,7 +790,7 @@ cellCellContactHeatmapTriangle <- function( cell_fun = function(j, i, x, y, w, h, col) { if (j == i) { grid.rect(x, y, w, h, gp = gpar(fill = col, col = "grey")) - grid.text(rownames(mat2)[i], x + factor*unit(0.5, "cm"), y + factor*unit(0.5, "cm"), rot = 45, hjust = 0, gp = gpar(size=20)) + grid.text(rownames(mat2)[i], x + factor*unit(0.5, "cm"), y + factor*unit(0.5, "cm"), rot = 45, hjust = 0, gp = gpar(fontsize=20)) } else if (which(od == i) < which(od == j)) { # grid.rect(x, y, w, h, gp = gpar(fill = NULL, col = NA)) grid.rect(x, y, w, h, gp = gpar(fill = col, col = NA)) diff --git a/code/utils/utils.R b/code/utils/utils.R index d6edaee..223ae53 100644 --- a/code/utils/utils.R +++ b/code/utils/utils.R @@ -109,6 +109,23 @@ embryo_ord = c('scRNAseq', 'embryo1_2', 'embryo1_5', 'embryo2_2', 'embryo2_5', embryo_cols = c("#DC050C", "#8600bf", "#ba5ce3", "#E7298A", "#E78AC3", "#1965B0", "#7BAFDE") names(embryo_cols) = embryo_ord +embryo_code = c( + 'scRNAseq' = 'scRNAseq', + 'embryo1_2' = 'E1_L1', + 'embryo1_5' = 'E1_L2', + 'embryo2_2' = 'E2_L1', + 'embryo2_5' = 'E2_L2', + 'embryo3_2' = 'E3_L1', + 'embryo3_5' = 'E3_L2') + +method_code = c( + 'True Space' = 'True Space', + 'sagenet' = 'SageNet', + 'tangram_cells' = 'Tangram', + 'novosparc' = 'novoSpaRc', + 'sagenet_markers' = "SageNet-SIG", + 'exprs' = 'Expression') + class__code = c( 'Compact ventricular myocardium', 'Trabecular ventricular myocardium (1)', @@ -268,7 +285,7 @@ plot_spatial <- function(dim_df, labels, label_cols=c(.palette1, .palette2, .pal ggplot + aes(dim1, dim2, color=label) + geom_point(size=sz) + - theme_void() + + theme_minimal() + theme(axis.text= element_blank(), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), @@ -282,13 +299,13 @@ plot_spatial <- function(dim_df, labels, label_cols=c(.palette1, .palette2, .pal dim_plot } -plot_2d <- function(dim_df, labels, label_cols=c(.palette1, .palette2, .palette3), title='', label_title='label', hide_legend=TRUE, sz=1, alpha=1){ +plot_2d <- function(dim_df, labels, label_cols=c(.palette1, .palette2, .palette3), title='', label_title='label', hide_legend=TRUE, sz=1, alpha=1, shape=16){ dim_dt = data.table(label=labels, dim1=unlist(dim_df[,1]), dim2=unlist(dim_df[,2]), alpha=alpha) dim_plot = dim_dt %>% ggplot + aes(dim1, dim2, color=label) + - geom_point(size=sz, alpha=alpha) + + geom_point(size=sz, alpha=alpha, shape=shape) + theme_minimal() + theme(axis.text= element_blank(), axis.ticks.x=element_blank(), diff --git a/figures/ST_human_heart/scRNAseq/B.Rds b/figures/ST_human_heart/scRNAseq/B.Rds index aab4ca8..e490db1 100644 Binary files a/figures/ST_human_heart/scRNAseq/B.Rds and b/figures/ST_human_heart/scRNAseq/B.Rds differ diff --git a/figures/seqfish_mouse_embryo/embryo1_2/B.Rds b/figures/seqfish_mouse_embryo/embryo1_2/B.Rds index 97afd28..5a2e3c0 100644 Binary files a/figures/seqfish_mouse_embryo/embryo1_2/B.Rds and b/figures/seqfish_mouse_embryo/embryo1_2/B.Rds differ diff --git a/figures/seqfish_mouse_embryo/embryo1_2/XC.Rds b/figures/seqfish_mouse_embryo/embryo1_2/XC.Rds index 43c6e6c..0368112 100644 Binary files a/figures/seqfish_mouse_embryo/embryo1_2/XC.Rds and b/figures/seqfish_mouse_embryo/embryo1_2/XC.Rds differ