-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget.QCmetrics.from.vcf.sh
57 lines (30 loc) · 1.33 KB
/
get.QCmetrics.from.vcf.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
# This script will extract important metrics that will help you define your cutoffs.
# By Errbii M (added 18.08.21)
#get all variables you need..
echo "Type the name of the vcf file you are working on now, followed by [ENTER]:"
read vcf
echo "Type the window size here, followed by [ENTER]:"
read windsize
echo "Type the window step here, followed by [ENTER]:"
read windstep
base=$(basename $vcf .vcf.gz)
echo "$base"
mkdir $(echo "$base"_QC)
out=$(echo "$base"_QC)
#no_snps=$(bcftools view $vcf|grep -v "#"|wc -l)
#echo "This file contains $no_snps"
echo "calculating allele frequency and count..."
#freq
$(vcftools --gzvcf $vcf --freq2 --out ./$out/$base.freq)
#count
$(vcftools --gzvcf $vcf --counts2 --out ./$out/$base.count)
echo "calculating site mean depth..."
$(vcftools --gzvcf $vcf --site-mean-depth --out ./$out/$base.site.depth)
echo "calculating the missingness on a per-individual basis...."
$(vcftools --gzvcf $vcf --missing-indv --out ./$out/$base.missing.indv)
echo "calculating the missingness on a per-site basis..."
$(vcftools --gzvcf $vcf --missing-site --out ./$out/$base.missing.site)
echo "calculating the number and density of SNPs in bins of 10kb..."
$(vcftools --gzvcf $vcf --SNPdensity 10000 --out ./$out/$base.density)
echo "done with calculating 5 metrics! Now got to R and visualize the data"