author: Timothy Tickle and Brian Haas css: cegs_2017.css date: January 2017
class:small-code
Please follow along using the cut_and_paste.txt that comes with this repo.
If code is cut off here in the presentation, it will be available in full in the cut_and_paste.txt document.
class:small-code
- R allows methodology written by others to be imported.
- Leverage other code.
- Make your code available to others.
# Load libraries
library(dplyr) # Dataframe manipulation
library(MAST) # For DE
library(Matrix) # Sparse matrices
library(useful) # Corner function
library(vioplot) # Violin pots
library(Seurat) # Single cell Analysis
source("./src/dotplot.R")
source("./src/makeNiceMAST.R")
load("./data/bipolar_raw.Robj")
class:small-code
# 24904 x 44994 (1 minute)
bipolar.data <- Matrix(as.matrix(bipolar_dge), sparse=TRUE)
# Memory use as a normal matrix
object.size(bipolar_dge)
8971430520 bytes
# Memory use as a sparse matrix
# ~23 X less
object.size(bipolar.data)
393096408 bytes
bipolar_dge <- NULL
class:small-code
# Expected raw counts (non-normalized data)
# Can give log transformed data but do not transform in setup method
bipolar.seurat.raw <- new("seurat", raw.data=bipolar.data)
class:small-code
# Display the internal pieces of the Seurat Object
slotNames(bipolar.seurat.raw)
[1] "raw.data" "data" "scale.data"
[4] "var.genes" "is.expr" "ident"
[7] "pca.x" "pca.rot" "emp.pval"
[10] "kmeans.obj" "pca.obj" "gene.scores"
[13] "drop.coefs" "wt.matrix" "drop.wt.matrix"
[16] "trusted.genes" "drop.expr" "data.info"
[19] "project.name" "kmeans.gene" "kmeans.cell"
[22] "jackStraw.empP" "jackStraw.fakePC" "jackStraw.empP.full"
[25] "pca.x.full" "kmeans.col" "mean.var"
[28] "imputed" "mix.probs" "mix.param"
[31] "final.prob" "insitu.matrix" "tsne.rot"
[34] "ica.rot" "ica.x" "ica.obj"
[37] "cell.names" "cluster.tree" "snn.sparse"
[40] "snn.dense" "snn.k"
class:small-code
- So far, raw sparse matrix.
head(bipolar.seurat.raw@raw.data)
6 x 44994 sparse Matrix of class "dgCMatrix"
0610005C13Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610007P14Rik . . . . 2 . . . 1 . . . . . . . . . . . . . . . . . . . . .
0610009B22Rik . 1 . . 1 . . . . . . 1 . . 1 . 1 2 . . . . . . . 1 . . . .
0610009E02Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610009L18Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610009O20Rik 2 . . . . . . . . . . . . . . . . . 1 . . . . 1 . . . . . .
0610005C13Rik . ......
0610007P14Rik . ......
0610009B22Rik . ......
0610009E02Rik . ......
0610009L18Rik . ......
0610009O20Rik . ......
.....suppressing columns in show(); maybe adjust 'options(max.print= *, width = *)'
..............................
class:small-code
- So far, raw sparse matrix.
head(bipolar.seurat.raw@data)
NULL
head(bipolar.seurat.raw@ident)
logical(0)
head(bipolar.seurat.raw@var.genes)
logical(0)
class:small-code
- var.genes Variable genes across cells.
- data.info Misc info including complexity (nGene).
- cell.names Column (cell) names.
- gene.names Row (gene) names.
?seurat
class:small-code
# Gene names (row names)
head(row.names(bipolar.seurat.raw@raw.data))
[1] "0610005C13Rik" "0610007P14Rik" "0610009B22Rik" "0610009E02Rik"
[5] "0610009L18Rik" "0610009O20Rik"
length(row.names(bipolar.seurat.raw@raw.data))
[1] 24904
class:small-code
# Column names # 24904 x 44994
# Sample / Cell names / Barcodes
head(colnames(bipolar.seurat.raw@raw.data),3)
[1] "Bipolar1_CCCACAAGACTA" "Bipolar1_TCGCCTCGTAAG" "Bipolar1_CAAAGCATTTGC"
length(colnames(bipolar.seurat.raw@raw.data))
[1] 44994
dim(bipolar.seurat.raw@raw.data)
[1] 24904 44994
class:small-code
# Only the corner
# The full data will be too large to see
corner(as.matrix(bipolar.seurat.raw@raw.data))
class:small-code
Bipolar1_CCCACAAGACTA Bipolar1_TCGCCTCGTAAG
0610005C13Rik 0 0
0610007P14Rik 0 0
0610009B22Rik 0 1
0610009E02Rik 0 0
0610009L18Rik 0 0
Bipolar1_CAAAGCATTTGC Bipolar1_CTTTTGATTGAC
0610005C13Rik 0 0
0610007P14Rik 0 0
0610009B22Rik 0 0
0610009E02Rik 0 0
0610009L18Rik 0 0
Bipolar1_GCTCCAATGACA
0610005C13Rik 0
0610007P14Rik 2
0610009B22Rik 1
0610009E02Rik 0
0610009L18Rik 0
class:small-code
# Plot genes per cell
# How many genes expressed per cells
# 3 Minutes
complexity.per.cell <- apply(bipolar.seurat.raw@raw.data,
2, function(x) sum(x>0))
# Mean count per cell.
# 3 minutes
mean.count.per.cell <- apply(bipolar.seurat.raw@raw.data,
2, function(x) mean(x))
# Gene prevalence
# 5 minutes
gene.prevalence <- apply(bipolar.seurat.raw@raw.data,
1, function(x) sum(x>0))
class:small-code
# Get batches based on cell names
technical_batches <- sapply(colnames(bipolar.seurat.raw@raw.data),
FUN=function(x){strsplit(x,"_",fixed=TRUE)[[1]][1]})
# Turn to numbers and add cell names to them
technical_batches <- as.numeric(as.factor(technical_batches))
names(technical_batches) <- colnames(bipolar.seurat.raw@raw.data)
class:small-code
# Plot genes per cell
# How many genes expressed per cell
# Violin plot
vioplot(complexity.per.cell)
# Add points
stripchart(complexity.per.cell, add=TRUE, vertical=TRUE, method="jitter", jitter=0.3, pch=20, col="#00000033")
# Add lines for reference
abline(h=500, col="orange")
abline(h=3000, col="green")
# Add text
title("Study Complexity")
axis(side=1,at=1,labels=c("Study"))
class:small-code
class:small-code
# Plot cell complexity, sorted by complexity
plot(1:length(complexity.per.cell),
sort(complexity.per.cell),
xlab="Cells ranked by complexity (Less to More)",
ylab="Complexity")
# Add lines
abline(h=500, col="orange")
abline(h=3000, col="green")
title("Complexity from Less to More")
class:small-code
class:small-code
# Plot genes per cell
# Complexity per technical batch
plot(complexity.per.cell ~ jitter(technical_batches,2))
# Add line
abline(h=500, col="orange")
abline(h=3000, col="green")
# Add text
title("Study Complexity")
axis(side=1,at=1,labels=c("Study"))
class:small-code
class:small-code
- Cells that are unusually simple (or no counts).
- Cells that are unusually complex (doublets?).
- We will filter in a standard way, we are just describing the data.
# Complexity by mean expression
plot(complexity.per.cell, mean.count.per.cell)
# Add lines
abline(v=500, col="orange")
abline(v=3000, col="green")
class:small-code
class:small-code
- People tend to filter very conservatively.
- Remember this is in log space.
# How often genes are seen in cells
hist(log2(gene.prevalence), breaks=100)
# add line
abline(v=log(30), col="orange")
class:small-code
class:small-code
- Genes must be in 30 cells.
- Cells must have alteast 500 genes.
- Scaled by 1000 (Total Sum Scaling).
# From 24904 x 44994 to 14575 x 30673
# 5 minutes
bipolar.seurat <- Setup(bipolar.seurat.raw,
min.cells=30, min.genes=500,
do.logNormalize=TRUE,
total.expr=1e4,
project="Tutorial")
[1] "Performing log-normalization"
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 10%
|
|======== | 13%
|
|========== | 16%
|
|============= | 19%
|
|=============== | 23%
|
|================= | 26%
|
|=================== | 29%
|
|===================== | 32%
|
|======================= | 35%
|
|========================= | 39%
|
|=========================== | 42%
|
|============================= | 45%
|
|=============================== | 48%
|
|================================== | 52%
|
|==================================== | 55%
|
|====================================== | 58%
|
|======================================== | 61%
|
|========================================== | 65%
|
|============================================ | 68%
|
|============================================== | 71%
|
|================================================ | 74%
|
|================================================== | 77%
|
|==================================================== | 81%
|
|======================================================= | 84%
|
|========================================================= | 87%
|
|=========================================================== | 90%
|
|============================================================= | 94%
|
|=============================================================== | 97%
|
|=================================================================| 100%
[1] "Scaling data matrix"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
bipolar.seurat.raw <- NULL
class:small-code
dim(bipolar.seurat@raw.data)
[1] 24904 44994
class:small-code
dim(bipolar.seurat@data)
[1] 14575 30673
class:small-code
head(bipolar.seurat@data.info)
nGene nUMI orig.ident
Bipolar1_CCCACAAGACTA 3468 8083 Bipolar1
Bipolar1_TCGCCTCGTAAG 4495 4541 Bipolar1
Bipolar1_CAAAGCATTTGC 1674 3131 Bipolar1
Bipolar1_CTTTTGATTGAC 1727 3321 Bipolar1
Bipolar1_GCTCCAATGACA 1845 3468 Bipolar1
Bipolar1_AAATACCCTCAT 1635 3004 Bipolar1
class:small-code
- Filtering on mitochondrial reads in Seurat.
# Get gene names
mito.gene.names <- grep("^mt-", rownames(bipolar.seurat@data), value=TRUE)
class:small-code
- Filtering on mitochondrial reads in Seurat.
# Get TSS normalized mitochodrial counts
# Cell total expression
col.total.counts <- Matrix::colSums(expm1(bipolar.seurat@data))
# Scale each count by the cell total
mito.percent.counts <- Matrix::colSums(expm1(bipolar.seurat@data[mito.gene.names, ]))/col.total.counts
class:small-code
- Filtering on mitochondrial reads in Seurat.
# Add to seurat object as a metadata
bipolar.seurat <- AddMetaData(bipolar.seurat, mito.percent.counts, "percent.mitochodrial")
# Check
head(bipolar.seurat@data.info)
nGene nUMI orig.ident percent.mitochodrial
Bipolar1_CCCACAAGACTA 3468 8083 Bipolar1 0.025170490
Bipolar1_TCGCCTCGTAAG 4495 4541 Bipolar1 0.002911534
Bipolar1_CAAAGCATTTGC 1674 3131 Bipolar1 0.029411765
Bipolar1_CTTTTGATTGAC 1727 3321 Bipolar1 0.031975867
Bipolar1_GCTCCAATGACA 1845 3468 Bipolar1 0.020809249
Bipolar1_AAATACCCTCAT 1635 3004 Bipolar1 0.024333333
class:small-code
- Plot current metadata in Seurat Object.
- Number gene.
- Number UMI.
- Percent mitochondrial counts (batch affect).
VlnPlot(bipolar.seurat, "nGene")
VlnPlot(bipolar.seurat, "nUMI")
VlnPlot(bipolar.seurat, "percent.mitochodrial")
VlnPlot(bipolar.seurat, c("nGene", "nUMI", "percent.mitochodrial"), nCol=3)
class:small-code
class:small-code
class:small-code
class:small-code
class:small-code
- Outlier percent mitochondria are very low expression.
- Very high expression have low percent mitochondrial reads.
GenePlot(bipolar.seurat, "nUMI", "percent.mitochodrial")
class:small-code
- Outlier percent mitochondria are very low expression.
- Very high expression have low percent mitochondrial reads.
class:small-code
# Filter libraries with more than 10% mitochondrially derived transcripts
# From 14575 x 30673 to 14575 x 27489
dim(bipolar.seurat@data)
[1] 14575 30673
# 1 Minute
# Filter max complexity
bipolar.seurat <- SubsetData(bipolar.seurat, subset.name="nGene", accept.high=3000)
# Filter percent percent mitochondrial reads
bipolar.seurat <- SubsetData(bipolar.seurat, subset.name="percent.mitochodrial", accept.high=0.1)
class:small-code
dim(bipolar.seurat@data)
[1] 14575 27489
table(bipolar.seurat@data.info[["orig.ident"]])
Bipolar1 Bipolar2 Bipolar3 Bipolar4 Bipolar5 Bipolar6
3053 3419 3661 3849 7127 6380
# Original standard matrix 8,971,430,520 bytes
object.size(bipolar.seurat)
3910195000 bytes
class:small-code
Saving the Seurat object.
- Contains all manipulation so far.
- Can be loaded or shared.
- Does not contain environment.
# 3.16 GB
# save(bipolar.seurat, file="./data/bipolar_seurat.Robj")
# load("./data/bipolar_seurat.Robj")
class:small-code
You may need to export data to import into other applications.
# Log-scale expression matrix
# 5 minutes (1.2 GB)
# write.table(as.matrix(bipolar.seurat@data), file="bipolar_data.txt")
# Study metadata (1.2 MB)
# write.table([email protected], file="bipolar_metadata.txt")
class:small-code
Plotting genes across groups.
- Check the identity of the cells!!!
VlnPlot(bipolar.seurat, "Prkca")
VlnPlot(bipolar.seurat, "Prkca", group.by="orig.ident")
class:small-code
- Notice many zeros.
class:small-code
class:small-code
# Plot a gene vs a gene
GenePlot(bipolar.seurat,"Isl1","Grm6",cex.use=1)
class:small-code
class:small-code
# Create batch affect (5 and 6 as a group)
batch.effect <- technical_batches[row.names(bipolar.seurat@data.info)]
batch.effect[batch.effect %in% c(1,2,3,4)] <- 1
batch.effect[batch.effect %in% c(5,6)] <- 2
# Add batch affect
bipolar.seurat <- AddMetaData(bipolar.seurat, batch.effect, "batch.effect")
# Regress on metadata
bipolar.seurat <- RegressOut(bipolar.seurat, latent.vars="batch.effect")
[1] "Regressing out batch.effect"
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
[1] "Scaling data matrix"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
class:small-code
- We are going to focus on highly variable genes.
- Average expression (X) and dispersion (SD)(Y).
- Bins genes (20).
- Z-score for dispersion.
- fxn.x and fxn.y allow one to change the measurements.
- Cut.offs are high and low, X and Y.
# 2 minutes
bipolar.seurat <- MeanVarPlot(bipolar.seurat,
fxn.x=expMean,
fxn.y=logVarDivMean,
x.low.cutoff=0.0125,
x.high.cutoff=3,
y.cutoff=0.5, do.contour=FALSE,
do.plot=FALSE)
[1] "Calculating gene dispersion"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
length(bipolar.seurat@var.genes)
[1] 2285
class:small-code
- Calculating PCA with the highly variable genes.
# 9 minutes
bipolar.seurat <- PCA(bipolar.seurat,
pc.genes=bipolar.seurat@var.genes,
do.print=FALSE)
# save(bipolar.seurat, file="./data/bipolar_seurat_pca.Robj")
class:small-code
# Calculate PCA projection
bipolar.seurat <- ProjectPCA(bipolar.seurat)
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
[1] "PC1"
[1] "Cplx3" "Snap25" "Gng13" "Syt1" "Trpm1"
[6] "Gnao1" "Mir124-2hg" "Calm1" "Lrtm1" "Pcp2"
[11] "Atp2b1" "Nme1" "Otx2" "Isl1" "Gnb3"
[16] "B3galt2" "Chgb" "Pcp4" "Neurod4" "Nap1l5"
[21] "Map4" "Nsf" "Cabp5" "Scg2" "Trnp1"
[26] "Aplp2" "Meg3" "Unc119" "Sncb" "mt-Cytb"
[1] ""
[1] "Dkk3" "Rlbp1" "Slc1a3" "Apoe" "Mfge8" "Cd9" "Car14"
[8] "Cp" "Ptn" "Sparc" "Spc25" "Aqp4" "Glul" "Crym"
[15] "Col9a1" "Prdx6" "Clu" "Hes1" "Timp3" "Pdpn" "Gpr37"
[22] "Fxyd1" "Egr1" "Jun" "Kdr" "Dbi" "Tsc22d4" "Gpm6b"
[29] "Espn" "Dapl1"
[1] ""
[1] ""
[1] "PC2"
[1] "Prkca" "Car8" "Ablim1" "Chgb" "Pcp2" "Calm1"
[7] "Vstm2b" "Trpm1" "Ccdc136" "Qpct" "Strip2" "Ift20"
[13] "Sebox" "Mt1" "Pcp4" "Tpbg" "Adrb1" "Zbtb20"
[19] "Adamts5" "Kcne2" "Isl1" "Rpa1" "Casp7" "Cacna2d3"
[25] "Tgfb2" "Gng13" "Lin7a" "Gpr179" "Mt2" "Cep112"
[1] ""
[1] "App" "Scgn" "Gria2" "Lbh"
[5] "Gucy1a3" "Samsn1" "Sncb" "Ptprz1"
[9] "Gsg1" "Fam19a3" "Tubb2a" "Snhg11"
[13] "Gngt1" "Cadm3" "Gngt2" "Slc24a3"
[17] "A530058N18Rik" "Pcdh9" "Rpgrip1" "A730046J19Rik"
[21] "Zeb2" "Slitrk6" "Cxxc5" "Fezf2"
[25] "Drd1" "Cplx4" "Zfp804b" "Trib2"
[29] "Otor" "Lhx4"
[1] ""
[1] ""
[1] "PC3"
[1] "Cdh9" "Neurod2" "Lmo4" "BC046251"
[5] "Nfia" "Medag" "Hs3st4" "Hpca"
[9] "Cplx4" "Gas6" "Meis2" "Sulf2"
[13] "Atp2b1" "St18" "Vipr2" "Gm10605"
[17] "Kcng4" "Gnb3" "Epha7" "Gng13"
[21] "Ptprt" "1810041L15Rik" "Gucy1a3" "Gm4792"
[25] "Cxcl14" "Adcy2" "Grm6" "RP23-363M4.1"
[29] "Isl1" "Sox6"
[1] ""
[1] "Slit2" "Rcvrn" "Tacr3" "A730046J19Rik"
[5] "Zfhx4" "Rpgrip1" "A930003A15Rik" "Pdc"
[9] "Glra1" "Cdh8" "Gnat1" "Fezf2"
[13] "Rp1" "Nrl" "Rs1" "Esam"
[17] "Rho" "Gnb1" "Sag" "Pde6b"
[21] "Pde1a" "Slc24a1" "Nxph1" "Guca1b"
[25] "Slitrk6" "Aipl1" "Nr2e3" "Neto1"
[29] "Guk1" "Phyhipl"
[1] ""
[1] ""
[1] "PC4"
[1] "Tacr3" "Slit2" "Zfhx4" "A730046J19Rik"
[5] "Glra1" "Nxph1" "Cdh8" "Gabra1"
[9] "Neto1" "Cabp1" "Lhx3" "Pde1a"
[13] "Wls" "Ncam2" "Fezf2" "Meg3"
[17] "Lamp5" "Scg2" "Gabrb1" "Esam"
[21] "Scgn" "Tmeff2" "Slitrk6" "Camk4"
[25] "Fam19a3" "Phyhipl" "A330008L17Rik" "Casp3"
[29] "Guk1" "Pcdh10"
[1] ""
[1] "Pdc" "Gnat1" "Rp1" "Rs1" "Sag" "Rho"
[7] "Pde6b" "Nr2e3" "Slc24a1" "Guca1b" "Nrl" "Pde6g"
[13] "Cnga1" "Guca1a" "Rcvrn" "Rtbdn" "Pde6a" "Nxnl1"
[19] "Aipl1" "Prph2" "Rdh12" "Tulp1" "Gngt1" "Samd11"
[25] "AI847159" "Reep6" "Gnb1" "Pex5l" "Rgs9" "Vtn"
[1] ""
[1] ""
[1] "PC5"
[1] "Gsg1" "Pcp4" "Otor" "Nnat"
[5] "Grik1" "Fezf2" "Fam19a3" "A930003A15Rik"
[9] "Phyhipl" "Lhx4" "Cabp5" "Dner"
[13] "Kcnip4" "Lrrtm3" "Nfib" "Cacna2d2"
[17] "A930009A15Rik" "Cacng2" "Prkar2b" "Sphkap"
[21] "Gngt1" "Cntn4" "Rpgrip1" "Gng2"
[25] "Cacna2d1" "Nxph3" "Cnnm1" "Lbh"
[29] "Slc16a7" "Slc24a3"
[1] ""
[1] "Vsx1" "Pcp4l1" "Cck" "Cdh8"
[5] "Reln" "Tnnt1" "Lect1" "Gjd2"
[9] "Zfhx4" "Scg2" "Pcdh9" "Lhx3"
[13] "A530058N18Rik" "Spock3" "A330008L17Rik" "Sec1"
[17] "Six3" "Kcnab1" "Chrna6" "Igfn1"
[21] "Camkv" "Guk1" "Gng13" "Syt2"
[25] "Adra2c" "Mkx" "Smoc2" "Igf1"
[29] "Gria2" "Fn1"
[1] ""
[1] ""
class:small-code
# Can plot top genes for top components
PrintPCA(bipolar.seurat,
pcs.print=1:2,
genes.print=5,
use.full=TRUE)
[1] "PC1"
[1] "Cplx3" "Snap25" "Gng13" "Syt1" "Trpm1"
[1] ""
[1] "Dkk3" "Rlbp1" "Slc1a3" "Apoe" "Mfge8"
[1] ""
[1] ""
[1] "PC2"
[1] "Prkca" "Car8" "Ablim1" "Chgb" "Pcp2"
[1] ""
[1] "App" "Scgn" "Gria2" "Lbh" "Gucy1a3"
[1] ""
[1] ""
class:small-code
- Top 30 genes associated with the first two components.
VizPCA(bipolar.seurat, pcs.use=1:2)
class:small-code
class:small-code
How can we use component loadings?
- Can check components to see if genes show up that are enriched in components.
- Using the scores you can perform enrichment analysis to decribe the component.
class:small-code
PCAPlot(bipolar.seurat, 1, 2)
class:small-code
class:small-code
- Plot top 30 genes in top 100 cells for PC1.
PCHeatmap(bipolar.seurat, pc.use=1, cells.use=100, do.balanced=TRUE)
class:small-code
- Plot top 30 genes in top 100 cells for PC1.
class:small-code
# Time Intensive
# Jackstraw
# bipolar.seurat <- JackStraw(bipolar.seurat, num.replicate = 100, do.print = FALSE)
class:small-code
How do we choose how many components to use?
- When there is diminishing returns to include it.
- Selection is performed more liberally in our setting.
# Time Efficient but more ad hoc
# Scree (elbow) plot
# 37 selected
PCElbowPlot(bipolar.seurat, num.pc=40)
class:small-code
class:small-code
- Calculate and plot t-SNE on PC 1- 37.
- Can use Barnes-hut implementation.
# Calculate t-SNE Ordination
# 7 minutes
bipolar.seurat <- RunTSNE(bipolar.seurat,
dims.use=1:37,
do.fast=TRUE)
class:small-code
# Plot
TSNEPlot(bipolar.seurat)
class:small-code
class:small-code
This is not PCA
- Distance is best understood in close neighbors.
- The measurement of distance is difficult to understand.
class:small-code
class:small-code
- Determine subclusters for the plot.
- Separate graph based approach.
- is not aware of the t-SNE projection.
- Using PC 1-37
# 14 minutes
# bipolar.seurat <- FindClusters(bipolar.seurat,
# pc.use=1:37,
# resolution=0.6,
# print.output=0,
# save.SNN=TRUE)
# save(bipolar.seurat, file="./data/bipolar_cluster_seurat.Robj")
load("./data/bipolar_cluster_seurat.Robj")
class:small-code
TSNEPlot(bipolar.seurat)
class:small-code
class:small-code
TSNEPlot(bipolar.seurat, do.label=TRUE)
class:small-code
class:small-code
Now that we have subclusters of cell populations, plotting genes through subclusters is identical to before.
- Seurat stores and groups by subclusters automatically.
VlnPlot(bipolar.seurat, "Prkca")
class:small-code
class:small-code
VlnPlot(bipolar.seurat, "Glul")
class:small-code
class:small-code
You can also gene expression through out the cell ordination.
- Marker genes can help identify cell groups.
- Rod Bipolar Cells (Prkca+ Scgn-).
- Muller Glia (Glul+ Prkca- Scgn-).
- Cone Bipolar Cells (Prkca- Scgn+).
- On Cone Bipolar Cells (Prkca- Scgn+ Grm6+).
- Metadata can help visualize batch affects.
FeaturePlot(bipolar.seurat,
c("Prkca","Glul", "Scgn", "Grm6"),
cols.use = c("grey","blue"))
class:small-code
class:small-code
FeaturePlot(bipolar.seurat,
"nGene",
cols.use = c("grey","blue"))
FeaturePlot(bipolar.seurat,
"percent.mitochodrial",
cols.use = c("grey","blue"))
class:small-code
class:small-code
class:small-code
Check for your batch affect.
- We are going to make a fake batch affect (site) and plot this as an example of how one can visualize unwanted signal.
# Making Fake Data
fake.sites <- as.integer(bipolar.seurat@ident %in% c(0))
names(fake.sites) <- names(bipolar.seurat@ident)
# Add metadata
bipolar.seurat <- AddMetaData(bipolar.seurat, fake.sites, "site")
# Plot feature
FeaturePlot(bipolar.seurat, c("site"), cols.use = c("green","orange"))
class:small-code
class:small-code
cell.labels <- bipolar.seurat@ident
corner(cell.labels)
class:small-code
Bipolar1_CAAAGCATTTGC Bipolar1_CTTTTGATTGAC Bipolar1_GCTCCAATGACA
5 4 0
Bipolar1_AAATACCCTCAT Bipolar1_TGCATGCGTCCA
3 5
Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
class:small-code
- Default is one cluster against many tests.
- Can specify an ident.2 test between clusters.
- Adding speed by excluding tests.
- Min.pct - controls for sparsity.
- Min percentage in a group.
- Thresh.test - must have this difference in averages.
- Min.pct - controls for sparsity.
# 2 minutes
cluster5.markers <- FindMarkers(bipolar.seurat, ident.1=5, min.pct = 0.25)
head(cluster5.markers, 30)
class:small-code
p_val avg_diff pct.1 pct.2
Cck 0 2.742669 0.692 0.008
Pcp4 0 -2.173863 0.170 0.653
Pcp2 0 -2.165046 0.294 0.742
Glul 0 -2.137603 0.161 0.310
Lect1 0 2.068691 0.547 0.032
Acsl3 0 -1.827424 0.160 0.417
Mt1 0 -1.754321 0.093 0.511
Tnnt1 0 1.749033 0.604 0.085
Prkca 0 -1.711193 0.053 0.383
Vsx1 0 1.686164 0.713 0.108
Gngt2 0 1.630449 0.694 0.124
Pcp4l1 0 1.600984 0.701 0.155
Cabp5 0 -1.586463 0.197 0.614
Scgn 0 1.578928 0.914 0.275
Scg2 0 1.571709 0.982 0.549
Unc13c 0 1.537642 0.550 0.105
Ablim1 0 -1.453118 0.085 0.405
Cdh8 0 1.450262 0.494 0.058
Spock3 0 1.397758 0.328 0.017
Gjd2 0 1.391591 0.526 0.103
Guk1 0 1.375674 0.850 0.349
Mt2 0 -1.270403 0.055 0.320
Ccdc136 0 -1.270049 0.086 0.409
Cntn4 0 -1.252449 0.028 0.278
Six3 0 1.216624 0.546 0.143
Car8 0 -1.175168 0.170 0.420
Zfhx4 0 1.171140 0.482 0.103
A530058N18Rik 0 1.159338 0.504 0.130
Ptprz1 0 1.152080 0.581 0.173
Lhx3 0 1.106063 0.299 0.029
class:small-code
- Find all cluster against all others.
# bipolar.markers <- FindAllMarkers(bipolar.seurat, only.pos=TRUE, min.pct=0.25, thresh.use=0.25)
# bipolar.markers %>% group_by(cluster) %>% top_n(2, avg_diff)
class:small-code
bimod: Tests differences in mean and proportions.
roc: Uses AUC like definition of separation.
t: Student's T-test.
tobit: Tobit regression on a smoothed data.
class:small-code
plot.genes <- head(rownames(cluster5.markers),30)
DoHeatmap(bipolar.seurat,
genes.use=plot.genes,
order.by.ident=TRUE,
slim.col.label=TRUE,
remove.key=TRUE)
class:small-code
class:small-code
dot.plot(bipolar.seurat,
features.use=plot.genes)
class:small-code
class:small-code
# Genes in tutorial
plot.genes = c("Vsx2","Otx2","Scgn","Isl1","Grm6",
"Apoe","Pax6","Rho","Arr3","Tacr3",
"Syt2","Neto1","Irx6","Prkar2b",
"Grik1","Kcng4","Cabp5","Vsx1","Prkca")
# Cell groups shown
only.use.groups = c(7,9,10,12,8,14,3,13,6,11,5,4,16,0)
# Plot
dot.plot(bipolar.seurat,
features.use=plot.genes,
subset.group=only.use.groups)
class:small-code
class:small-code
# Make a covariate to inform the test
test.5 <- as.character(bipolar.seurat@ident)
test.5[test.5 != "5"] <- "wild"
test.5[test.5 == "5"] <- "test"
names(test.5) <- names(bipolar.seurat@ident)
test.5 <- factor(test.5, levels=c("wild","test"))
class:small-code
# Load into Mast Object
# Mast object
# Data frame of cell-level covariates
# Data frame of feature-level covariates
bipolar.mast <- FromMatrix(as.matrix(bipolar.seurat@data),
cbind(bipolar.seurat@data.info,test.5),
data.frame(primerid=rownames(bipolar.seurat@data)))
class:small-code
- Create the model, here we load data for time.
- Perform test for our contrast.
- Format results for easier use.
# Run test
# 1 hr
# zlm.test.5 <- zlm.SingleCellAssay(~ test.5, bipolar.mast)
load("./data/mast_zlm.Robj")
df.test.5 <- data.frame(summary(zlm.test.5, doLRT="test.5test"))
summary.mast <- makeNice(df.test.5, val="test.5test")
class:small-code
- Results from test.
- Some overlapping genes.
summary.mast[abs(summary.mast[,5])>=1 & !is.na(summary.mast[,5]),]
Gene pval padj padj_strict logFC_test.5test
Cck Cck 0.000000e+00 0.000000e+00 0.000000e+00 2.037136
Cdh8 Cdh8 0.000000e+00 0.000000e+00 0.000000e+00 1.115389
Gjd2 Gjd2 0.000000e+00 0.000000e+00 0.000000e+00 1.105904
Gngt2 Gngt2 0.000000e+00 0.000000e+00 0.000000e+00 1.650368
Guk1 Guk1 0.000000e+00 0.000000e+00 0.000000e+00 1.721112
Lect1 Lect1 0.000000e+00 0.000000e+00 0.000000e+00 1.415152
Pcp2 Pcp2 0.000000e+00 0.000000e+00 0.000000e+00 -2.313532
Pcp4 Pcp4 0.000000e+00 0.000000e+00 0.000000e+00 -1.928654
Pcp4l1 Pcp4l1 0.000000e+00 0.000000e+00 0.000000e+00 1.598591
Ptprz1 Ptprz1 0.000000e+00 0.000000e+00 0.000000e+00 1.103493
Scg2 Scg2 0.000000e+00 0.000000e+00 0.000000e+00 2.396354
Scgn Scgn 0.000000e+00 0.000000e+00 0.000000e+00 2.306762
Six3 Six3 0.000000e+00 0.000000e+00 0.000000e+00 1.053435
Tnnt1 Tnnt1 0.000000e+00 0.000000e+00 0.000000e+00 1.450846
Unc13c Unc13c 0.000000e+00 0.000000e+00 0.000000e+00 1.231391
Vsx1 Vsx1 0.000000e+00 0.000000e+00 0.000000e+00 1.851497
Cabp5 Cabp5 2.231731e-257 1.282129e-254 1.296080e-253 -1.417557
Isl1 Isl1 4.754931e-192 2.114870e-189 2.137881e-188 1.313532
Mt1 Mt1 2.249854e-191 9.694058e-189 9.799535e-188 -1.228030
Gabra1 Gabra1 7.918514e-190 3.211190e-187 3.246130e-186 1.083016
class:small-code
plot.genes = c("Scg2","Scgn","Cck","Vsx1","Guk1",
"Gngt2","Pcp4l1","Pcp4","Pcp2")
dot.plot(bipolar.seurat,
features.use=plot.genes)