Skip to content

Commit

Permalink
Add Vision analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoë Steier committed Jun 26, 2023
1 parent 2fe5b32 commit a20f26b
Show file tree
Hide file tree
Showing 2 changed files with 343 additions and 0 deletions.
160 changes: 160 additions & 0 deletions Vision/Vision_AllData.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
---
title: "Run Vision on totalVI outputs for full thymus CITE-seq data set"
output: html_notebook
---

Zoë Steier

Run Vision on totalVI outputs for full thymus CITE-seq data set.

# Load required packages
```{r Install and load required packages}
# Install packages
# library(devtools)
# install_version("Matrix", version = "1.5.1")
# install_github("YosefLab/[email protected]")
# Load packages
library(VISION)
library(tidyverse)
sessionInfo()
```

# Load data
## Load totalVI data and metadata

```{r Load data as csvs saved from anndata}
totalvi_path <- "/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/totalVI_thymus/thymus111_allbatches_stable/totalVI_results/"
latent <- read_csv(gzfile(str_c(totalvi_path, "latent.csv.gz")))
umap_totalVI <- read_csv(gzfile(str_c(totalvi_path, "umap.csv.gz")))
denoised_proteins <- read_csv(gzfile(str_c(totalvi_path, "denoised_proteins.csv.gz")))
raw_proteins <- read_csv(gzfile(str_c(totalvi_path, "raw_proteins.csv.gz")))
totalVI_genes <- read_csv(gzfile(str_c(totalvi_path, "totalVI_genes.csv.gz")))
totalVI_proteins <- read_csv(gzfile(str_c(totalvi_path, "totalVI_proteins.csv.gz")))
raw_genes <- read_csv(gzfile(str_c(totalvi_path, "raw_genes.csv.gz")))
obs <- read_csv(gzfile(str_c(totalvi_path, "obs.csv.gz")))
annotations_path <- "/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/Annotation_thymus/"
annotations <- read_csv(gzfile(str_c(annotations_path, "annotations.csv.gz")))
```

```{r Load sample metadata}
meta_csv <- read_csv("/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/metadata/Metadata_SeqStats_totalSeq_experiments.csv")
meta_thymus <- meta_csv %>%
filter(Tissue == "thymus") %>%
filter(TotalSeq_panel == "ADT111")# %>% # 19 batches in total
meta_thymus <- meta_thymus %>%
mutate(Batch = seq(1, dim(meta_thymus)[1]) - 1) # rename batches starting at 0
meta_thymus
```

## Process data for Vision

```{r Process totalVI data}
# totalVI umap
totalvi_umap <- as.data.frame(umap_totalVI[, -1]) # Remove first column of dataframes - this is an index column coming from python
colnames(totalvi_umap) <- c("UMAP1", "UMAP2")
row.names(totalvi_umap) <- obs$X1
# totalVI latent space
totalvi_latent <- as.matrix(latent[, -1])
row.names(totalvi_latent) <- obs$X1
# totalVI denoised proteins. Use in proteinData in Vision that can be viewed in biaxial plots like FACS.
denoised_proteins_df <- log1p(as.data.frame(denoised_proteins[-1])) # take log of denoised protein data for in silico FACS
row.names(denoised_proteins_df) <- obs$X1
colnames(denoised_proteins_df) <- totalVI_proteins[, 2][[1]]
# Raw proteins
raw_proteins_df <- log1p(as.data.frame(raw_proteins[, -1])) # Take log of protein data
row.names(raw_proteins_df) <- obs$X1
# Rename raw columns to start with raw
totalVI_proteins <- totalVI_proteins %>%
mutate(raw_name = str_c("raw_", totalVI_proteins$`0`))
colnames(raw_proteins_df) <- totalVI_proteins$raw_name # set raw protein names
# Add raw proteins to the denoised proteins so they can both be visualized
all_proteins <- cbind(denoised_proteins_df, raw_proteins_df)
# Raw gene expression data
raw_genes_df <- as.data.frame(raw_genes[, -1])
row.names(raw_genes_df) <- obs$X1
colnames(raw_genes_df) <- totalVI_genes[, 2][[1]]
dim(all_proteins) # cells x proteins
```

```{r Make metadata for Vision}
# totalVI metadata and sample metadata
totalVI_meta <- obs %>%
select(Batch = 'batch_indices', UMIs_RNA = "n_counts", UMIs_protein = "n_protein_counts",
'n_genes', 'n_proteins', 'percent_mito', contains("leiden_totalVI_")) %>%
mutate_at(vars(contains("leiden_totalVI_")), as_factor) %>% # Convert cluster labels to factors
left_join(meta_thymus %>% select(Sample, Genotype, Experiment, Replicate, Location, Batch), by = "Batch") %>% # Join with experimental metadata
mutate(Mouse = str_c(Genotype, "_", Replicate)) %>% # Add mouse information
mutate_at(c("Sample", "Batch", "Mouse", "Experiment", "Genotype", "Location"), as_factor) %>% # Set as factors to preserve the order of levels
select(-c("Replicate", "Batch", "Sample", "Experiment"))
# Add lineage information to metadata
totalVI_meta <- totalVI_meta %>%
mutate(Annotation = as.factor(annotations$labels)) %>%
mutate(Lineage_by_genotype = as.factor(case_when(Genotype == "B6" ~ "WT", # Assign each cell to a lineage
Genotype %in% c("AND", "OT2", "B2M") ~ "CD4",
Genotype %in% c("F5", "OT1", "MHC2") ~ "CD8"))) %>%
select("Lineage_by_genotype", "Genotype", "Mouse", "Location", "Annotation", everything())
# Make metadata dataframe
totalVI_metadata <- as.data.frame(totalVI_meta)
row.names(totalVI_metadata) <- obs$X1 # Add the cell barcodes as row names
totalVI_metadata[1:5, ]
```

# Run Vision

```{r Get signatures}
signatures <- c(
"/data/yosef2/users/david.detomaso/Signatures/MSIGDB/H_Hallmark.gmt",
"/data/yosef2/users/david.detomaso/Signatures/Yoseflab/netPath.gmt",
"/data/yosef2/users/david.detomaso/Signatures/Yoseflab/signatures_NY_private.gmt"
)
```

```{r Make RNA matrix}
# Make RNA matrix
n.umi = median(colSums(t(raw_genes_df)))
expr = apply(t(raw_genes_df), 2, function(x) (x * n.umi) / (sum(x) + 1))
expr[1:5, 1:5]
# Set variable genes for Vision
projection_genes <- row.names(expr)
```

```{r Run Vision}
# Create Vision object
vis <- Vision(expr[,],
signatures = signatures,
meta = totalVI_metadata, #
projection_genes = projection_genes,
proteinData = all_proteins[,],
projection_methods = c("UMAP"),
latentSpace = totalvi_latent[,],
name = "Thymus CITE-seq all data")
vis = addProjection(vis, name = "UMAP_totalVI", coordinates = totalvi_umap[,1:2]) # Add totalVI UMAP
saveRDS(vis, "/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/Vision_thymus/NI_alldata_annotated/vis_object_unanalyzed.rds")
# Run in script on multiple cores.
# options(mc.cores = 10) # set the number of cores
# vis <- analyze(vis)
# saveRDS(vis, 'vision_object_analyzed.rds') # save results
# View results
# vis <- readRDS('vision_object_analyzed.rds')
# vis <- viewResults(vis, host='0.0.0.0', port=9002, browser = FALSE)
```
183 changes: 183 additions & 0 deletions Vision/Vision_PositiveSelection.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
---
title: "Run Vision on totalVI outputs for positive selection subset"
output: html_notebook
---

Zoë Steier

Run Vision on totalVI outputs for positive selection subset.

# Load required packages
```{r Install and load required packages}
# Install packages
# library(devtools)
# install_version("Matrix", version = "1.5.1")
# install_github("YosefLab/[email protected]")
# Load packages
library(VISION)
library(tidyverse)
sessionInfo()
```
# Load data
## Load totalVI data and metadata

```{r Load data as csvs saved from anndata}
totalvi_path <- "/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/totalVI_thymus/thymus111_allbatches_stable_posselecting/totalVI_results/"
latent <- read_csv(gzfile(str_c(totalvi_path, "latent.csv.gz")))
umap_totalVI <- read_csv(gzfile(str_c(totalvi_path, "umap.csv.gz")))
denoised_proteins <- read_csv(gzfile(str_c(totalvi_path, "denoised_proteins.csv.gz")))
raw_proteins <- read_csv(gzfile(str_c(totalvi_path, "raw_proteins.csv.gz")))
totalVI_genes <- read_csv(gzfile(str_c(totalvi_path, "totalVI_genes.csv.gz")))
totalVI_proteins <- read_csv(gzfile(str_c(totalvi_path, "totalVI_proteins.csv.gz")))
raw_genes <- read_csv(gzfile(str_c(totalvi_path, "raw_genes.csv.gz")))
obs <- read_csv(gzfile(str_c(totalvi_path, "obs.csv.gz")))
```

```{r Load sample metadata}
meta_csv <- read_csv("/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/metadata/Metadata_SeqStats_totalSeq_experiments.csv")
meta_thymus <- meta_csv %>%
filter(Tissue == "thymus") %>%
filter(TotalSeq_panel == "ADT111") # 19 batches in total
meta_thymus <- meta_thymus %>%
mutate(Batch = seq(1, dim(meta_thymus)[1]) - 1) # rename batches starting at 0
meta_thymus
```

```{r Load pseudotime data}
time_bins_small <- read_csv("/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/Slingshot_thymus/pseudotime_slingshot_binned_20210202.csv")
time_bins_small <- time_bins_small %>%
filter(!(is.na(mean_pseudotime))) %>% # Remove cells with no Slingshot information. Later, merge by barcode.
mutate(Lineage = case_when(Genotype == "B6" ~ "WT", # Assign each cell to a lineage
Genotype %in% c("AND", "OT2", "B2M") & Lineage_by_genotypeSlingshot != "unassigned" ~ "CD4",
Genotype %in% c("F5", "OT1", "MHC2") & Lineage_by_genotypeSlingshot != "unassigned" ~ "CD8",
Genotype %in% c("F5", "OT1", "MHC2") & Lineage_by_genotypeSlingshot == "unassigned" ~ "wrong_to_CD4",
Genotype %in% c("AND", "OT2", "B2M") & Lineage_by_genotypeSlingshot == "unassigned" ~ "wrong_to_CD8")) %>%
mutate(Pseudotime_bin = as_factor(Pseudotime_bin)) # Rename time bins as integers to match totalVI, change to factor
time_bins_small
```

## Process data for Vision

```{r Process totalVI data}
# totalVI UMAP
totalvi_umap <- as.data.frame(umap_totalVI[, -1]) # Remove first column of dataframes - this is an index column coming from python
colnames(totalvi_umap) <- c("UMAP1", "UMAP2")
row.names(totalvi_umap) <- obs$X1
totalvi_umap <- totalvi_umap[time_bins$Barcode, ]
# totalVI latent space
totalvi_latent <- as.matrix(latent[, -1])
row.names(totalvi_latent) <- obs$X1
totalvi_latent <- totalvi_latent[time_bins$Barcode, ]
# totalVI denoised proteins. Use in proteinData in Vision that can be viewed in biaxial plots like FACS.
denoised_proteins_df <- log1p(as.data.frame(denoised_proteins[-1])) # take log of denoised protein data for in silico FACS
row.names(denoised_proteins_df) <- obs$X1
colnames(denoised_proteins_df) <- totalVI_proteins[, 2][[1]]
denoised_proteins_df <- denoised_proteins_df[time_bins$Barcode, ]
# Raw gene expression data
raw_genes_df <- as.data.frame(raw_genes[, -1])
row.names(raw_genes_df) <- obs$X1
colnames(raw_genes_df) <- totalVI_genes[, 2][[1]]
raw_genes_df <- raw_genes_df[time_bins$Barcode, ]
# Raw proteins
raw_proteins_df <- log1p(as.data.frame(raw_proteins[, -1])) # Take log of protein data
row.names(raw_proteins_df) <- obs$X1
raw_proteins_df <- raw_proteins_df[time_bins$Barcode, ]
# Rename raw columns to start with raw
totalVI_proteins <- totalVI_proteins %>%
mutate(raw_name = str_c("raw_", totalVI_proteins$`0`))
colnames(raw_proteins_df) <- totalVI_proteins$raw_name
# Add raw proteins to the denoised proteins so they can both be visualized
all_proteins <- cbind(denoised_proteins_df, raw_proteins_df)
dim(all_proteins) # cells x proteins
```

```{r Read in silico gate data}
gated_denoised <- read_csv("/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/Slingshot_thymus/gated_denoised_data_20210330.csv")
gates <- gated_denoised %>%
select(Barcode, summary_gates_v4)
gates$summary_gates_v4 <- factor(gates$summary_gates_v4, levels = c("DP1", "DP2", "CD4+CD8low", "DP3", "SemimatureCD4", "SemimatureCD8", "MatureCD4", "MatureCD8"))
table(gates$summary_gates_v4)
```

```{r Make metadata for Vision}
# totalVI metadata and sample metadata
totalVI_meta <- obs %>%
rename("Barcode" = "X1") %>%
right_join(time_bins_small %>% select(Barcode, mean_pseudotime, Lineage, Pseudotime_bin), by = "Barcode") %>% # Filter to positive selection cells used for pseudotime
left_join(gates, by = "Barcode") %>%
select(Batch = 'batch_indices', UMIs_RNA = "n_counts", UMIs_protein = "n_protein_counts",
'n_genes', 'n_proteins', 'percent_mito', Pseudotime = "mean_pseudotime", "Pseudotime_bin", contains("leiden_totalVI_"), "Lineage", in_silico_gates = "summary_gates_v4") %>%
mutate_at(vars(contains("leiden_totalVI_")), as_factor) %>% # Convert cluster labels to factors
left_join(meta_thymus %>% select(Genotype, Replicate, Location, Batch), by = "Batch") %>% # Join with experimental metadata
mutate(Mouse = str_c(Genotype, "_", Replicate)) %>% # Add mouse information
mutate_at(c("Batch", "Mouse", "Genotype", "Location"), as_factor) %>% # Set as factors to preserve the order of levels
select(-c("Replicate", "Batch")) %>%
select("Lineage", "Genotype", "Mouse", "Location", "in_silico_gates", "Pseudotime", "Pseudotime_bin", everything()) # Add slingshot metadata
totalVI_meta$Lineage <- factor(totalVI_meta$Lineage, levels = c("CD4", "CD8", "WT", "wrong_to_CD4", "wrong_to_CD8"))
table(totalVI_meta$Lineage)
# Make metadata dataframe
totalVI_metadata <- as.data.frame(totalVI_meta)
row.names(totalVI_metadata) <- time_bins_small$Barcode #obs$X1 # Add the cell barcodes as row names
totalVI_metadata[1:5, ]
```

# Run Vision

```{r Get signatures}
signatures <- c(
"/data/yosef2/users/david.detomaso/Signatures/MSIGDB/H_Hallmark.gmt",
"/data/yosef2/users/david.detomaso/Signatures/Yoseflab/netPath.gmt",
"/data/yosef2/users/david.detomaso/Signatures/Yoseflab/signatures_NY_private.gmt"
)
```

```{r Make RNA matrix}
# Make RNA matrix
n.umi = median(colSums(t(raw_genes_df)))
expr = apply(t(raw_genes_df), 2, function(x) (x * n.umi) / (sum(x) + 1))
# Set variable genes for Vision
projection_genes <- row.names(expr)
```

```{r Run Vision}
# Create Vision object
vis <- Vision(expr[,],
signatures = signatures,
meta = totalVI_metadata[,],
projection_genes = projection_genes,
proteinData = all_proteins[,],
projection_methods = c("UMAP"),
latentSpace = totalvi_latent[,],
name = "Thymus CITE-seq positive selection")
vis = addProjection(vis, name = "UMAP totalVI", coordinates = totalvi_umap[,1:2]) # Add totalVI UMAP
saveRDS(vis, "/data/yosef2/users/zsteier/TotalSeq/20190814_BioLegend_ZRS08/analysis/Vision_thymus/NI_posselecting_slingshot/vis_object_unanalyzed.rds")
# Run in script on multiple cores.
# options(mc.cores = 10) # set the number of cores
# vis <- analyze(vis)
# saveRDS(vis, 'vision_object_analyzed.rds') # save results
# View results
# vis <- readRDS('vision_object_analyzed.rds')
# vis <- viewResults(vis, host='0.0.0.0', port=9001, browser = FALSE)
```

0 comments on commit a20f26b

Please sign in to comment.