-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcircbase_comparison.Rmd
83 lines (73 loc) · 2.9 KB
/
circbase_comparison.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
---
title: "Comparison to CircBase"
author: Tomás Germade
date: July 17, 2019
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r library}
suppressPackageStartupMessages({
library(GenomicRanges)
library(rtracklayer)})
```
```{r setwd, warning=FALSE}
setwd("~/Documents/R_stuff/")
```
# Import
```{r import}
# import dcc datasets
ribo_dcc <- read.table("/mnt/ins/mouse_ribozero_hippocampus/circtools/01_detect/mouse_ribozero_hippocampus/output/CircRNACount", header = TRUE, sep = "\t")
rnaser_dcc <- read.table("/mnt/ins/mouse_RNAseR_hippocampus/circtools/01_detect/mouse_RNAseR_hippocampus/output/CircRNACount", header = TRUE, sep = "\t")
# import bed datassets
rnaser_bed <- read.table("/mnt/schratt/tgermade_test/salmon/mouse_RNAseR_hippocampus/1746_combined.exon_counts.bed", header = FALSE, sep = "\t")
colnames(rnaser_bed) <- c("Chr","Start","End","Name","Counts","Strand",
"ExonStart","ExonEnd","Color","ExonNr","ExonLen","RelExonStart")
circbase_bed <- read.table("../Schratt_Lab/circbase_mouse_mm10.bed", header = TRUE, sep = "\t")
colnames(circbase_bed) <- c("Chr","Start","End","Name","Counts","Strand",
"ExonStart","ExonEnd","Color","ExonNr","ExonLen","RelExonStart")
```
# FUCHS quantification
## Process
```{r adjust rnaser coordinates}
# modify rnaser bed start coordinates to be compatible with circbase bed
rnaser_bed$Start <- rnaser_bed$Start - 1
```
```{r extract rnaser coordinates}
# extract transcript coordinates
c_rnaser_bed <- paste(rnaser_bed$Chr, rnaser_bed$Start, rnaser_bed$End)
c_circbase_bed <- paste(circbase_bed$Chr, circbase_bed$Start, circbase_bed$End)
```
## Analysis
```{r bed matches}
# find matching transcripts between dcc and bed
m_bed <- lapply( c_rnaser_bed, FUN=function(x) which(c_circbase_bed==x) )
table(sapply(m_bed, length))
table(sapply(m_bed, length))[2] / nrow(circbase_bed)
```
--> we find 65% of CircBase transcripts in our RNAseR quantification
# DCC quantification
## Process
```{r adjust dcc coordinates}
# modify rnaser bed start coordinates to be compatible with circbase bed
ribo_dcc$Start <- ribo_dcc$Start - 1
rnaser_dcc$Start <- rnaser_dcc$Start - 1
```
```{r extract dcc coordinates}
# extract transcript coordinates
c_ribo_dcc <- paste(ribo_dcc$Chr, ribo_dcc$Start, ribo_dcc$End)
c_rnaser_dcc <- paste(rnaser_dcc$Chr, rnaser_dcc$Start, rnaser_dcc$End)
```
## Analysis
```{r dcc matches}
# find matching transcripts between dcc and bed
m_ribo_dcc <- lapply( c_ribo_dcc, FUN=function(x) which(c_circbase_bed==x) )
m_rnaser_dcc <- lapply( c_rnaser_dcc, FUN=function(x) which(c_circbase_bed==x) )
table(sapply(m_ribo_dcc, length))
table(sapply(m_rnaser_dcc, length))
table(sapply(m_ribo_dcc, length))[2] / nrow(circbase_bed)
table(sapply(m_rnaser_dcc, length))[2] / nrow(circbase_bed)
```
--> we find 11% of CircBase transcripts in the RiboZero DCC
--> we find 99% of CircBase transcripts in the RNAseR DCC