-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLOH_NCIMD.R
68 lines (51 loc) · 2.49 KB
/
LOH_NCIMD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#CHAS
Phase1=read.csv('/home/sinhas8/Downloads/Segment_CHAS_Phase1.segment.txt', sep='\t')
Phase2=read.csv('/home/sinhas8/Downloads/P2_Initial_Data/CHAS_Segements_Try1.csv.txt', sep='\t')
Phase1=Phase1[,c('Use.In.Export' , 'File' , 'CN.State' , 'Type' , 'Chromosome' , 'Full.Location')]
Phase2=Phase2[,c('Use.In.Export' , 'File' , 'CN.State' , 'Type' , 'Chromosome' , 'Full.Location')]
#OncoScan
Phase1=read.csv('/home/sinhas8/Downloads/From_OncoScan_COnsole/Segment_OncoScan_Phase1.segment.txt', sep='\t')
Phase2=read.csv('/home/sinhas8/Downloads/From_OncoScan_COnsole/Segment_OncoScan_Phase2.segment.txt', sep='\t')
both=rbind(Phase1, Phase2)
both=both[both$Type=='TotalCN',]
colnames(both)
mat=read.csv('/home/sinhas8/Projects/Project_Chromotrypsis/2.Data/mat_Nov09.csv')
Normal_files=levels(both$File)[is.na(match(levels(both$File), mat$File))]
both_filtered=both[-which(!is.na(match(both$File, Normal_files))),]
both_list=split(both_filtered, both_filtered$File)
both_list=both_list[-which(sapply(both_list, nrow)==0)]
LOH=data.frame(sapply(both_list, function(x) Calculate_LOH(x)))
colnames(LOH)=c('Count_LOH')
mat=
mat$LOH=LOH
Calculate_LOH<-function(cnv){
# Count_LOH=
sum(cnv$Type=='LOH')
# LOH_Prop=sum(cnv$StopPosition[cnv$Type=='LOH']-cnv$StartPosition[cnv$Type=='LOH'])/(3.3*10^9)
# c(Count_LOH, LOH_Prop)
}
###Plotting LOH across our cohort
tiff('/home/sinhas8/Projects/Project_Chromotrypsis/NCIMD_HRD_byLOH.tiff')
theme_set(theme_gray(base_size = 18))
ggplot(mat, aes(y=LOH, x=hist, fill=race))+geom_boxplot()+
stat_compare_means(label = "p.signif", method = "wilcox"
,method.args = list(alternative = "l")
)+
labs(x='Histology', y='LOH Segments Counts')
dev.off()
###############
#CNV burden
###############
both$FileName=as.character(both$FileName)
CNV_chromosome_burden<-function(both){
both_byfile=split(both,both$FileName)
both_byfileandchr=lapply(both_byfile, function(x) split(x, x$Chromosome))
CNV_burden=sapply(both_byfileandchr, function(x) sum(sapply(x, function(y)
sum(y$StopPosition[y$State!= 2]-y$StartPosition[y$State!= 2])))/(3.3*10^9) )
Loss_burden=sapply(both_byfileandchr, function(x) sum(sapply(x, function(y)
sum(y$StopPosition[y$State< 2]-y$StartPosition[y$State< 2])))/(3.3*10^9) )
Gain_burden=sapply(both_byfileandchr, function(x) sum(sapply(x, function(y)
sum(y$StopPosition[y$State> 2]-y$StartPosition[y$State> 2])))/(3.3*10^9) )
data.frame(CNV_burden, Gain_burden, Loss_burden)
}
GI=CNV_chromosome_burden(both)