-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMapAndCountUMIs_general.sh
executable file
·67 lines (53 loc) · 3.5 KB
/
MapAndCountUMIs_general.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# David Zemmour
# usage: /groups/cbdm_lab/dp133/scripts/allon_scripts/MapAndCountUMIs.sh $path_to_the_folder_containing_fastq_file $prefix
cd $1
prefix=$2
module load seq/fastx/0.0.13
module load seq/tophat/2.0.10
module load seq/bowtie/2.1.0
module load seq/samtools/0.1.19
#module load seq/htseq/0.6.1
module load seq/BEDtools/2.23.0
module load stats/R/3.2.1
echo "Running tophat..."
mkdir thoutfs
#to make the transcriptome reference: tophat -p 2 -G genes.gtf --transcriptome-index=/groups/cbdm_lab/dp133/genomes/hg19/transcriptome /groups/cbdm_lab/dp133/genomes/hg19/genome
# Comments: hg19/transcriptome (human) and NOD_custom_mm10/known_mm10 (mouse) -variable[1], hg19/genome (human) NOD_custom_mm10/genome (mouse)-variable[2] for human,
tophat -p 4 --library-type fr-firststrand --read-mismatches 5 --read-gap-length 5 --read-edit-dist 5 --no-coverage-search --segment-length 15 --transcriptome-index /groups/immdiv-bioinfo/evergrande/yael/genomes/*****Varilabe[1] -o ./thoutfs /groups/immdiv-bioinfo/evergrande/yael/genomes/****Variable[2] $prefix.fq
#tophat2 -p 2 --library-type fr-firststrand --read-mismatches 5 --read-gap-length 5 --read-edit-dist 5 --no-coverage-search --segment-length 15 --transcriptome-index /groups/cbdm_lab/dp133/genomes/hg19/transcriptome -o ./thoutfs /groups/cbdm_lab/dp133/genomes/hg19/genome $prefix.R1
#tophat -p 4 --library-type fr-firststrand --read-mismatches 5 --read-gap-length 5 --read-edit-dist 5 --no-coverage-search --segment-length 15 --transcriptome-index /groups/cbdm_lab/dp133/NOD_custom_mm10/known_mm10 -o ./thoutfs /groups/cbdm_lab/dp133/NOD_custom_mm10/genome reads_95_89.fq
cd thoutfs
echo "Removing multiple alignments"
samtools view -b -F 256 accepted_hits.bam > accepted_hits.uniqalign.bam
#Make bed file with exons
#cut -f 1,4,5 known_mm10_exons.gff > 1
#cut -f 9 known_mm10_exons.gff | cut -d ' ' -f 2 | cut -d '"' -f 2 > 2
#cut -f 6,7 known_mm10_exons.gff > 3
#paste 1 2 3 > known_mm10_exons.bed
# for human i used ../../genomes/hg19/biomart_hg19_genes.gff #andrew
# Comments: Choose hg19_genes.bed for human, known_mm10_exons.bed for mouse - ****Variable
echo "Annotating bam file with gene name..."
tagBam -s -i accepted_hits.uniqalign.bam -files /groups/immdiv-bioinfo/evergrande/copy_work/yael/liRNAseq_newdpzpipeline/****Variable -names > accepted_hits.uniqalign.genenames.bam
#samtools view test.bam | grep YB | wc -l
mv accepted_hits.uniqalign.genenames.bam $prefix.uniqalign.genenames.bam
echo "Sorting and indexing bam file"
samtools sort $prefix.uniqalign.genenames.bam $prefix.uniqalign.genenames.sorted
samtools index $prefix.uniqalign.genenames.sorted.bam
echo "UMI correction: writing correcting bam file and count file..."
Rscript /groups/immdiv-bioinfo/evergrande/copy_work/yael/liRNAseq_newdpzpipeline/CorrectAndCountUMIs.R exons $prefix.uniqalign.genenames.sorted.bam
echo "Stats..."
echo $prefix > names.txt
#grep 'Input' *align* | cut -d ':' -f 1 | cut -d '.' -f 1 > names.txt
grep 'Input' *align* | cut -d ':' -f 3 > reads.txt
grep 'Mapped' *align* | cut -d ':' -f 3 | cut -d '(' -f 1 > mapped.txt
grep 'of these' *align* | cut -d ':' -f 3 | cut -d '(' -f 1 > multialigned.txt
samtools view *umicorrected.bam | wc -l > umis.txt
echo 'SAMPLE READS MAPPED MULTIALIGN UMIS' > $prefix.mapping_rates.txt
paste names.txt reads.txt mapped.txt multialigned.txt umis.txt >> $prefix.mapping_rates.txt
rm names.txt reads.txt mapped.txt multialigned.txt umis.txt
echo "Removing files..."
#rm *genenames*
#rm accepted_hits.uniqalign.bam
echo "Copying files..."
cp $prefix* ../../