-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_gene_level_cnv_by_case.sh
executable file
·50 lines (47 loc) · 1.48 KB
/
get_gene_level_cnv_by_case.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/bash
## get gene-level copy number values
## modified from
mainRunDir=$1
bamMapFile=$2
inputDir=${mainRunDir}"inputs/"
outputDir=${mainRunDir}"outputs/"
bamType=$3
javaPath=$4
gatkPath=$5
refFile=$6
exomeBedFile=$7
batchName=$8
outputbatchDir=${outputDir}${batchName}"/"
genelevelFile=$9
version=${10}
id=${11}
cancerType=${12}
toolDirName=${13}
while read j
do
mkdir -p ${outputbatchDir}${j}"/gene_level_by_case"
cd ${outputbatchDir}${j}
ls *.seg | while read file; do
sample=$(echo $file | cut -f1 -d'.')
case=$(echo $sample | grep -f - ${inputDir}${bamMapFile} | awk -F ' ' '{print $2}' | uniq )
echo $case
sed '1d' $file | cut -f2,3,4,6 | bedtools intersect -loj -a ${inputDir}"unique_genes.sorted.rmchr.bed" -b - | python ${mainRunDir}"gatk4wxscnv."${batchName}"/gene_segment_overlap.py" > ${outputbatchDir}${j}/gene_level_by_case/$case.gene_level_by_case.log2.seg
done
done<${mainRunDir}${toolDirName}"/"${cancerType}
while read j
do
cd ${outputbatchDir}${j}
cd gene_level_by_case
ls *.seg | cut -f1 -d'.' > samples.txt
genelevelOut=${genelevelFile}"."${j}"."${batchName}".v"${version}"."${id}".tsv"
echo gene > ${genelevelOut}
cut -f1 $(head -1 samples.txt).gene_level_by_case.log2.seg >> ${genelevelOut}
cat samples.txt | while read sample; do
echo ${sample}
echo $sample > smp
cut -f5 $sample.gene_level_by_case.log2.seg >> smp
paste ${genelevelOut} smp > tmp2
mv -f tmp2 ${genelevelOut}
rm -f smp
done
done<${mainRunDir}${toolDirName}"/"${cancerType}