-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscVelo_dataset.Rmd
140 lines (117 loc) · 5.32 KB
/
scVelo_dataset.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
---
title: "GeneTrajectory example (scVelo dataset)"
author: "Rihao Qu"
date: "9/20/2023"
output:
rmarkdown::html_document:
toc: true
toc_depth: 2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
#knitr::opts_chunk$set(fig.width=5, fig.height=4.5)
```
# Preparation
```{r, warning = FALSE, message=FALSE}
##### Load required R libraries
library(Seurat)
require(scales)
require(ggplot2)
require(viridis)
require(dplyr)
require(GeneTrajectory)
require(Matrix)
require(plot3D)
```
# Loading example data
```{r, warning = FALSE, fig.width=5.5, fig.height=4.5}
##### Import the example SMC data and take a preliminary visualization
data_S <- readRDS("/banach1/rq25/GeneTrajectory_data/scVelo_tutorial_data_S.rds")
DimPlot(data_S, group.by = "clusters", shuffle = T)
```
################################################
# Gene-gene distance computation
################################################
## Select genes
```{r, warning = FALSE}
assay <- "RNA"
DefaultAssay(data_S) <- assay
data_S <- FindVariableFeatures(data_S, nfeatures = 2000)
all_genes <- data_S@assays[[assay]]@var.features
expr_percent <- apply(as.matrix(data_S[[assay]]@data[all_genes, ]) > 0, 1, sum)/ncol(data_S)
genes <- all_genes[which(expr_percent > 0.01 & expr_percent < 0.5)]
length(genes)
```
## Prepare the input for gene-gene Wasserstein distance computation
```{r, warning = FALSE}
data_S <- GeneTrajectory::RunDM(data_S)
cell.graph.dist <- GetGraphDistance(data_S, K = 10, dims = 1:10)
cg_output <- CoarseGrain(data_S, cell.graph.dist, genes, N = 1000)
```
## Prepare the input for gene-gene Wasserstein distance computation
```{r, warning = FALSE}
#####Save the files for computation in Python
dir.path <- "/banach1/rq25/tmp/scVelo_example2/"
dir.create(dir.path, recursive = T)
setwd(dir.path)
write.table(cg_output[["features"]], paste0(dir.path, "gene_names.csv"), row.names = F, col.names = F, sep = ",")
write.table(cg_output[["graph.dist"]], paste0(dir.path, "ot_cost.csv"), row.names = F, col.names = F, sep = ",")
Matrix::writeMM(Matrix(cg_output[["gene.expression"]], sparse = T), paste0(dir.path, "gene_expression.mtx"))
```
################################################
# Wasserstein distance computation in Python
################################################
```{r, warning = FALSE, fig.width=5, fig.height=4.5}
# Make sure to install the latest version of POT module (python), using the following:
# pip install -U https://github.com/PythonOT/POT/archive/master.zip
# Run the following command to get the gene-gene Wasserstein distance matrix
#system(sprintf("nohup /data/rihao/anaconda3/bin/python /data/rihao/GeneTrajectory/GeneTrajectory/python/gene_distance_cal_parallel.py %s &", dir.path))
```
################################################
# Gene trajectory inference and visualization
################################################
```{r, warning = FALSE, fig.width=4.5, fig.height=4}
dir.path <- "/banach1/rq25/GeneTrajectory_data/more_biological_examples/scvelo_example/DM_DIM10_K10/"
gene.dist.mat <- LoadGeneDistMat(dir.path, file_name = "emd.csv")#[genes, genes]
dim(gene.dist.mat)
gene_embedding <- GetGeneEmbedding(gene.dist.mat, K = 5)$diffu.emb
gene_trajectory <- ExtractGeneTrajectory(gene_embedding, gene.dist.mat, N = 3, t.list = c(21,3,5), K = 5)
table(gene_trajectory$selected)
par(mar = c(1.5,1.5,1.5,1.5))
scatter3D(gene_embedding[,1],
gene_embedding[,2],
gene_embedding[,3],
bty = "b2", colvar = as.integer(as.factor(gene_trajectory$selected))-1,
main = "trajectory", pch = 19, cex = 1, theta = 45, phi = 0,
col = ramp.col(c("lightgray", hue_pal()(3))))
scatter3D(gene_embedding[,1],
gene_embedding[,2],
gene_embedding[,4],
bty = "b2", colvar = as.integer(as.factor(gene_trajectory$selected))-1,
main = "trajectory", pch = 19, cex = 1, theta = 45, phi = 0,
col = ramp.col(c("lightgray", hue_pal()(3))))
scatter3D(gene_embedding[,2],
gene_embedding[,3],
gene_embedding[,4],
bty = "b2", colvar = as.integer(as.factor(gene_trajectory$selected))-1,
main = "trajectory", pch = 19, cex = 1, theta = 45, phi = 0,
col = ramp.col(c("lightgray", hue_pal()(3))))
gene_list <- list()
for (i in 1:3){
#message(paste0("Trajectory-", i))
gene_trajectory_sub <- gene_trajectory[which(gene_trajectory$selected == paste0("Trajectory-", i)),]
genes <- rownames(gene_trajectory_sub)[order(gene_trajectory_sub[, paste0("Pseudoorder", i)])]
#message(paste(genes, collapse = ", "))
gene_list[[i]] <- genes
}
```
# Visualize gene bin plots
```{r, warning = FALSE, fig.width=16, fig.height=3}
data_S <- AddGeneBinScore(data_S, gene_trajectory, N.bin = 7, trajectories = 1:3, assay = "RNA", reverse = c(F, F, F))
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",1,"_genes", 1:7), ncol = 7, order = T) &
scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes()
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",2,"_genes", 1:7), ncol = 7, order = F) &
scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes()
FeaturePlot(data_S, pt.size = 0.05, features = paste0("Trajectory",3,"_genes", 1:7), ncol = 7, order = T) &
scale_color_gradientn(colors = rev(brewer_pal(palette = "RdYlBu")(10))) & NoLegend() & NoAxes()
```