-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_popinfo_snplist.pl
91 lines (64 loc) · 2.06 KB
/
get_popinfo_snplist.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!perl
#
# Description: Get all variants within certain coordinates from a popinfo file.
#
#
#
# Created by Jessica X Chong on 2012-06-19
use strict;
use warnings;
use Getopt::Long;
my ($popinfodir, $popinfofile, $inputfile, $outputfile);
GetOptions(
'popinfodir=s' => \$popinfodir,
'in=s' => \$inputfile,
'out=s' => \$outputfile,
);
if (!defined $popinfodir) {
optionUsage("option --popinfodir not defined\n");
} elsif (!defined $outputfile) {
optionUsage("option --out not defined\n");
} elsif (!defined $inputfile) {
optionUsage("options --in not defined\n");
}
$popinfofile = $popinfodir;
$popinfofile =~ s/popinfo-/all./;
$popinfofile = "$popinfofile.popinfo.tsv.gz";
print STDOUT "Note: These results (genotype counts and frequencies) use data from only 96 Hutterite genomes (not from 98) because including 2 affected children biases the MAF calculations\n\n";
# get header
open (POPINFOFILE, "zcat $popinfodir/$popinfofile |") or die "Cannot read popinfo from $popinfodir/$popinfofile: $!\n";
my $header = <POPINFOFILE>;
$header =~ s/\s+$//; # Remove line endings
close POPINFOFILE;
open (OUT, ">$outputfile") or die "Cannot write to $outputfile: $!.\n";
print OUT "$header\n";
open (INPUT, "$inputfile") or die "Cannot read $inputfile file.\n";
while ( <INPUT> ) {
$_ =~ s/\s+$//; # Remove line endings
my ($chr, $start, $stop) = split("\t", $_);
if ($chr !~ 'chr') {
$chr = "chr$chr";
}
$start =~ s/,//g;
$start -= 1;
$stop =~ s/,//g;
print STDOUT "Fetching variants from chromosome $chr:$start-$stop\n";
my $matchedvariants = 0;
my @variants = `tabix $popinfodir/$popinfofile $chr:$start-$stop`;
foreach (@variants) {
print OUT "$_";
$matchedvariants++;
}
print STDOUT "Found $matchedvariants variants between coordinates $chr:$start-$stop\n";
}
close INPUT;
close OUT;
sub optionUsage {
my $errorString = $_[0];
print "$errorString";
print "perl $0 \n";
print "\t--popinfo\tpopinfo file\n";
print "\t--out\toutput file\n";
print "\t--in\tinput file, tab-delimited, coordinates assumed to be 1-based: chr, start, stop\n";
die;
}