-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathSomatic_amplicon_panel.sh
140 lines (123 loc) · 6.4 KB
/
Somatic_amplicon_panel.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/bin/sh
# Copyright (c) 2016-2024 Sentieon Inc. All rights reserved
# *******************************************
# Script to perform TNscope variant calling on
# PCR amplicon tumor sample with coverage over
# 200x and a matched normal sample.
# *******************************************
set -eu
# Update with the fullpath location of your sample fastq
TUMOR_SM="tumor_sample" #sample name
TUMOR_RGID="rg_$TUMOR_SM" #read group ID
NORMAL_SM="normal_sample" #sample name
NORMAL_RGID="rg_$NORMAL_SM" #read group ID
PL="ILLUMINA" #or other sequencing platform
FASTQ_FOLDER="/home/pipeline/samples"
TUMOR_FASTQ_1="$FASTQ_FOLDER/tumor_1.fastq.gz"
TUMOR_FASTQ_2="$FASTQ_FOLDER/tumor_2.fastq.gz" #If using Illumina paired data
NORMAL_FASTQ_1="$FASTQ_FOLDER/normal_1.fastq.gz"
NORMAL_FASTQ_2="$FASTQ_FOLDER/normal_2.fastq.gz" #If using Illumina paired data
# Update with the location of the reference data files
FASTA_DIR="/home/regression/references/b37"
FASTA="$FASTA_DIR/human_g1k_v37_decoy.fasta"
KNOWN_DBSNP="$FASTA_DIR/dbsnp_138.b37.vcf.gz"
INTERVAL_FILE="targeted_regions.bed"
# Update with the location of the Sentieon software package and license file
SENTIEON_INSTALL_DIR=/home/release/sentieon-genomics-|release_version|
TNSCOPE_FILTER=/home/sentieon-scripts/tnscope_filter/tnscope_filter.py
export SENTIEON_LICENSE=/home/Licenses/Sentieon.lic #or using licsrvr: c1n11.sentieon.com:5443
# Other settings
NT=$(nproc) #number of threads to use in computation, set to number of cores in the server
START_DIR="$PWD/test/TNscope" #Determine where the output files will be stored
# ************************************************************************************************************************************************************************
# ******************************************
# 0. Setup
# ******************************************
WORKDIR="$START_DIR"
mkdir -p $WORKDIR
LOGFILE=$WORKDIR/run.log
exec >$LOGFILE 2>&1
cd $WORKDIR
# ******************************************
# 1a. Mapping reads with BWA-MEM, sorting for the tumor sample
# ******************************************
#The results of this call are dependent on the number of threads used. To have number of threads independent results, add chunk size option -K 10000000
( $SENTIEON_INSTALL_DIR/bin/sentieon bwa mem -R "@RG\tID:$TUMOR_RGID\tSM:$TUMOR_SM\tPL:$PL" \
-t $NT -K 10000000 $FASTA $TUMOR_FASTQ_1 $TUMOR_FASTQ_2 || \
{ echo -n 'BWA error'; exit 1; } ) | \
$SENTIEON_INSTALL_DIR/bin/sentieon util sort -o tumor_sorted.bam -t $NT --sam2bam -i - || \
{ echo "Alignment1 failed"; exit 1; }
# ******************************************
# 1b. Mapping reads with BWA-MEM, sorting for the normal sample
# ******************************************
#The results of this call are dependent on the number of threads used. To have number of threads independent results, add chunk size option -K 10000000
( $SENTIEON_INSTALL_DIR/bin/sentieon bwa mem -R "@RG\tID:$NORMAL_RGID\tSM:$NORMAL_SM\tPL:$PL" \
-t $NT -K 10000000 $FASTA $NORMAL_FASTQ_1 $NORMAL_FASTQ_2 || \
{ echo -n 'BWA error'; exit 1; } ) | \
$SENTIEON_INSTALL_DIR/bin/sentieon util sort -o normal_sorted.bam -t $NT --sam2bam -i - || \
{ echo "Alignment2 failed"; exit 1; }
# ******************************************
# 2a. Metrics for the tumor sample
# ******************************************
$SENTIEON_INSTALL_DIR/bin/sentieon driver -r $FASTA -t $NT -i tumor_sorted.bam \
--algo MeanQualityByCycle tumor_mq_metrics.txt \
--algo QualDistribution tumor_qd_metrics.txt --algo GCBias \
--summary tumor_gc_summary.txt tumor_gc_metrics.txt --algo AlignmentStat \
--adapter_seq '' tumor_aln_metrics.txt \
--algo InsertSizeMetricAlgo tumor_is_metrics.txt \
--algo CoverageMetrics --omit_base_output tumor_coverage_metrics || \
{ echo "Metrics1 failed"; exit 1; }
$SENTIEON_INSTALL_DIR/bin/sentieon plot GCBias -o tumor_gc-report.pdf tumor_gc_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot QualDistribution \
-o tumor_qd-report.pdf tumor_qd_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot MeanQualityByCycle \
-o tumor_mq-report.pdf tumor_mq_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot InsertSizeMetricAlgo \
-o tumor_is-report.pdf tumor_is_metrics.txt
# ******************************************
# 2b. Metrics for the normal sample
# ******************************************
$SENTIEON_INSTALL_DIR/bin/sentieon driver -r $FASTA -t $NT -i normal_sorted.bam \
--algo MeanQualityByCycle normal_mq_metrics.txt \
--algo QualDistribution normal_qd_metrics.txt --algo GCBias \
--summary normal_gc_summary.txt normal_gc_metrics.txt --algo AlignmentStat \
--adapter_seq '' normal_aln_metrics.txt \
--algo InsertSizeMetricAlgo normal_is_metrics.txt \
--algo CoverageMetrics --omit_base_output normal_coverage_metrics || \
{ echo "Metrics2 failed"; exit 1; }
$SENTIEON_INSTALL_DIR/bin/sentieon plot GCBias -o normal_gc-report.pdf normal_gc_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot QualDistribution \
-o normal_qd-report.pdf normal_qd_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot MeanQualityByCycle \
-o normal_mq-report.pdf normal_mq_metrics.txt
$SENTIEON_INSTALL_DIR/bin/sentieon plot InsertSizeMetricAlgo \
-o normal_is-report.pdf normal_is_metrics.txt
# ******************************************
# 4. Somatic and Structural variant calling
# ******************************************
# Consider adding `--disable_detector sv` if not interested in SV calling
$SENTIEON_INSTALL_DIR/bin/sentieon driver -r $FASTA -t $NT \
${INTERVAL_FILE:+--interval_padding 10 --interval $INTERVAL_FILE} \
-i tumor_sorted.bam -i normal_sorted.bam \
--algo TNscope \
--tumor_sample $TUMOR_SM --normal_sample $NORMAL_SM \
--dbsnp $KNOWN_DBSNP \
--min_tumor_allele_frac 0.009 \
--max_fisher_pv_active 0.05 \
--max_normal_alt_cnt 10 \
--max_normal_alt_frac 0.01 \
--max_normal_alt_qsum 250 \
--sv_mask_ext 10 \
--prune_factor 0 \
--assemble_mode 2 \
output_tnscope.pre_filter.vcf.gz || \
{ echo "TNscope failed"; exit 1; }
# ******************************************
# 6. Variant filtration
# ******************************************
$SENTIEON_INSTALL_DIR/bin/sentieon pyexec $TNSCOPE_FILTER \
-v output_tnscope.pre_filter.vcf.gz \
--tumor_sample $TUMOR_SM \
-x amplicon --min_tumor_af 0.0095 --min_depth 200 \
output_tnscope.filter.vcf.gz || \
{ echo "TNscope filter failed"; exit 1; }