-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMobiBedAnnotator.sh
93 lines (78 loc) · 2.25 KB
/
MobiBedAnnotator.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/bash
#title :MobiBedAnnotator.sh
#description :This script will annotate a BED file with refSeq NM and exons positions
#author : Henri Pégeot & David Baux
#date (dd/mm/yyyy) :
#notes :
#bash_version :bash-3.2$-release
#==============================================================================
USAGE="
---
MobiBedAnnotator.sh: script to annotate a BED file for NGS experiments. requires bedtools to be installed
---
sh MobiBedAnnotator.sh -r /path/to/ROI.bed -m /path/to/bed/to/annotate -o /path/to/output.bed
=======
Arguments:
-r, --roi-bed: Your ROI BED file to be annotated. Must contain 4 columns.
Options:
-m, --master-bed: Path to the provided master.bed file which contains annotations for all coding HGNC genes (05/2018). Default cwd.
-b, --bedtools-path: Path to bedtools executables. Mandatory if bedtools not in path, optional otherwise
-o, --output: Desired name for the annotated output bed
"
if [ "$#" -eq 0 ]; then
echo "${USAGE}"
echo "Error Message : No arguments provided"
echo ""
exit 1
fi
MASTER_PATH=master.bed
BEDTOOLS=$(command -v bedtools)
while [[ "$#" -gt 0 ]]
do
KEY="$1"
case "${KEY}" in
-r|--roi-bed)
ROI_PATH="$2"
shift
;;
-m|--master-bed)
MASTER_PATH="$2"
shift
;;
-b|--bedtools-path)
BEDTOOLS="$2"
shift
;;
-o|--output-bed)
OUTPUT="$2"
shift
;;
-h|--help)
echo "${USAGE}"
exit 1
;;
*)
echo "Error Message : Unknown option ${KEY}" # unknown option
exit
;;
esac
shift
done
if [[ -e ${BEDTOOLS} && -e ${ROI_PATH} && -e ${MASTER_PATH} ]];then
ROI_FILE=$(basename "${ROI_PATH}")
sed '/^#/ d' ${ROI_PATH} | \
${BEDTOOLS} intersect -a - -b ${MASTER_PATH} -wa -wb | \
awk 'OFS="\t" {print $1,$2,$3,$NF}' | \
sort -k1,1 -k2,2n - | \
${BEDTOOLS} merge -i - -c 4 -o collapse > "tmp_annotated_merged_${ROI_FILE}"
${BEDTOOLS} intersect -a ${ROI_PATH} -b ${MASTER_PATH} -v |
awk 'OFS="\t" {print $1,$2,$3,$NF}' - > "tmp_not_annotated_${ROI_FILE}"
cat "tmp_annotated_merged_${ROI_FILE}" "tmp_not_annotated_${ROI_FILE}" | \
sort -k1,1 -k2,2n | \
${BEDTOOLS} merge -i - -c 4 -o collapse > "${OUTPUT}"
rm "tmp_annotated_merged_${ROI_FILE}" "tmp_not_annotated_${ROI_FILE}"
else
echo "${USAGE}"
echo "One condiion is not fullfilled"
exit 1
fi