-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagarwal_nuclei.Rmd
105 lines (74 loc) · 2.79 KB
/
agarwal_nuclei.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
---
title: "Agarwal Nuclei Single Cell"
author: "Peter Kilfeather"
date: "11/06/2020"
output: html_document
---
```{r}
library(Seurat)
## Dataset for analysis
dataset_loc <- "/zfs/analysis/bm_rna/raw_matrices/samples"
ids <- list.dirs("raw_matrices/samples", full.names = FALSE)[-1]
d10x.data <- sapply(ids, function(i){
d10x <- Read10X(file.path(dataset_loc, i))
colnames(d10x) <- paste(sapply(strsplit(colnames(d10x),split="-"),'[[',1L),i,sep="-")
d10x
})
experiment.data <- do.call("cbind", d10x.data)
nuclei <- CreateSeuratObject(
experiment.data,
project = "Agarwal Nuclei",
min.cells = 3,
min.features = 500,
names.field = 2,
names.delim = "\\-")
nuclei
nuclei[["percent.mt"]] <- PercentageFeatureSet(nuclei, pattern = "^MT-")
nuclei[["percent.ribo"]] <- PercentageFeatureSet(nuclei, pattern = "^RP[SL]")
nuclei[["percent.mt"]]
VlnPlot(nuclei, features = c("percent.mt"), ncol = 1)
VlnPlot(nuclei, features = c("percent.ribo"), ncol = 1)
VlnPlot(nuclei, features = c("nFeature_RNA"), ncol = 1)
VlnPlot(nuclei, features = c("nCount_RNA"), ncol = 1)
# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(nuclei, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(nuclei, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
nuclei_subset <- subset(nuclei, subset = nFeature_RNA < 5000 & percent.mt < 5)
nuclei_subset
nuclei_subset <- NormalizeData(nuclei_subset, normalization.method = "LogNormalize", scale.factor = 10000)
```
```{r}
nuclei_subset <- FindVariableFeatures(nuclei_subset, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(nuclei_subset), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(nuclei_subset)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
plot1
plot2
```
```{r}
all.genes <- rownames(nuclei_subset)
nuclei_subset <- ScaleData(nuclei_subset, features = all.genes)
nuclei_subset <- RunPCA(nuclei_subset, features = VariableFeatures(object = nuclei_subset))
DimPlot(nuclei_subset, reduction = "pca")
```
```{r}
DimHeatmap(nuclei_subset, dims = 1:3, cells = 500, balanced = TRUE)
```
```{r}
ElbowPlot(nuclei_subset)
```
```{r}
nuclei_subset <- FindNeighbors(nuclei_subset, dims = 1:20)
nuclei_subset <- FindClusters(nuclei_subset, resolution = 0.5)
# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =
# 'umap-learn')
nuclei_subset <- RunUMAP(nuclei_subset, dims = 1:10)
# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
DimPlot(nuclei_subset, reduction = "umap")
```