-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsample_report.Rmd
126 lines (94 loc) · 4.23 KB
/
sample_report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
``` {r setup, echo=FALSE, message=FALSE, include=FALSE, error=FALSE}
library(GenomicRanges, warn.conflicts=F)
library(Rsamtools, warn.conflicts=F)
library(GenomicAlignments)
library(magrittr)
library(parallel)
setwd("/data/analysis_code/")
options(knitr.figure_dir = "sample_report")
source("shared_code/knitr_common.r")
source("shared_code/granges_common.r")
```
# Sample report
**Author:** [Wanqing Shao](mailto:[email protected])
**Generated:** `r format(Sys.time(), "%a %b %d %Y, %I:%M %p")`
## Overview
Generate alignment report, report contains raw sequencing reads, reads containing fixed barcode (only applicable to ChIP-nexus samples), aligned reads and uniquely aligned reads. If two replicates were performed, calculate R2 values between replicates by performing linear regression on the read counts that fall within each mRNA promoter region.
### Generate alignment report
```{r alignment_report}
sample_list <- read.csv("./sample_list.csv", stringsAsFactors = F)
status_report <- function(name , paired=F){
sample_info <- sample_list[sample_list$sample_name == name,]
path_to_fastq <- sample_info$raw_fastq
path_to_processed_fastq <- sample_info$preprocessed_fastq
path_to_bam <- sample_info$bam
path_to_granges <- sample_info$granges
message(" Counting original FASTQ reads...")
original_fastq_count <- as.integer(system(paste("zcat", path_to_fastq, "| wc -l"), intern=T)) / 4
message(" Counting processed FASTQ reads...")
if(nchar(path_to_processed_fastq) ==0){
processed_fastq_count <- NA
}else{
processed_fastq_count <- as.integer(system(paste("zcat", path_to_processed_fastq, "| wc -l"), intern=T)) / 4
}
message(" Counting total aligned reads...")
if(paired) {
bam_count <- length(readGAlignmentPairs(as.character(path_to_bam)))
} else {
bam_count <- length(readGAlignments(as.character(path_to_bam)))
}
message(" Counting uniquely aligned reads...")
if(sample_info$data_type == "chip_nexus"){
granges_count <- length(readRDS(path_to_granges))
}else{
granges_count <- length(get(load(path_to_granges)))
}
status.df <- data.frame(sample = name, raw_reads = original_fastq_count,
pf_reads= processed_fastq_count, aligned_reads=bam_count,
uniquely_aligned=granges_count)
status.df
}
samples <- sample_list$sample_name
status_report_list <- cache("alignment_report.rds", function(){
mclapply(samples, function(x)status_report(x), mc.cores=10)
})
status_report_df <- do.call(rbind, status_report_list)
rownames(status_report_df) <- NULL
pander(status_report_df, "alignment report")
```
### Checking the consistency between replicates
```{r checking_replicate}
tss <- get(load("./rdata/dme_mrna_unique_tss.RData"))
checking_replicate <- function(name){
sample_info <- sample_list[sample_list$short_name == name,]
path_to_granges <- sample_info$granges
rds1 <- readRDS(as.character(path_to_granges[1]))
rds2 <- readRDS(as.character(path_to_granges[2]))
if(length(grep("spikein",path_to_granges)) ==2){
seqlevels(rds1, force=T) <- grep("dm3", seqlevels(rds1), value=T)
seqlevels(rds1, force=T) <- gsub("dm3_","", seqlevels(rds1))
seqlevels(rds2, force=T) <- grep("dm3", seqlevels(rds2), value=T)
seqlevels(rds2, force=T) <- gsub("dm3_","", seqlevels(rds2))
}
cov1 <- resize(rds1, 1, "start") %>% coverage(.)
cov2 <- resize(rds2, 1, "start") %>% coverage(.)
rep1 <- resize(tss, 201, "center") %>% regionSums(., cov1)
rep2 <- resize(tss, 201, "center") %>% regionSums(., cov2)
r2 <- summary(lm(rep1 ~rep2))$r.square
df <- data.frame(sample_name =sample_info$sample_name , r_squared = round(r2, digit=3))
df
}
sample_with_rep <- subset(sample_list, nchar(replicate) > 1)$short_name %>% unique(.)
consistency_report_list <- cache("consistency_report1.rds", function(){
mclapply(sample_with_rep, function(x)checking_replicate(x), mc.cores=4)
})
consistency_report_df <- do.call(rbind, consistency_report_list)
colnames(consistency_report_df) <- c( "sample","r_squared" )
rownames(consistency_report_df) <- NULL
pander(consistency_report_df, "consistency report")
report.df <- merge(status_report_df, consistency_report_df)
write.table(report.df, file="/data/analysis_code/sample_report.txt")
```
```{r}
sessionInfo()
```