forked from valflanza/ResCap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocSam.pl
executable file
·118 lines (97 loc) · 2.1 KB
/
procSam.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/perl
################################################
#
# procSam.pl perform the counting of reads per gene, reads per kilobase per gene,
# number of unequivocally mapped reads and alignment horizontal coverage. The output
# is a table with the described fields as columns and the founded genes as rows.
# procSam.pl is implemented by procSamples.pl is the standart pipeline.
#
# Usage:
# ./procSam.pl fileWithGeneLengths.txt OutputProcSamplesfile.drs > sampleCounts.cvs
#
#
#
################################################
open(B,$ARGV[1]);
@len = <B>;
close B;
foreach $l (@len)
{
chomp $l;
@c = split(' ',$l);
$longitud{$c[1]}=$c[0];
}
open(A,$ARGV[0]);
@prev = [0,0,0,0,0];
@actual = split('\t',<A>);
@next = split('\t',<A>);
if($actual[0] eq @prev[0] | $actual[0] eq @next[0])
{
$counts{$actual[1]}++;
}else{
$counts{$actual[1]}++;
$countsSpecific{$actual[1]}++;
}
while($l =<A>)
{
chomp $l;
@prev = @actual;
@actual = @next;
@next = split('\t',$l);
push(@{$position{$actual[1]}},$actual[2]);
push(@{$position{$actual[1]}},$actual[4]);
if($actual[0] eq @prev[0] | $actual[0] eq @next[0])
{
$counts{$actual[1]}++;
}else{
$counts{$actual[1]}++;
$countsSpecific{$actual[1]}++;
}
}
foreach $k (keys(%position))
{
#print "$k\t".join(',',@{$position{$k}})."\n";
$seq{$k} =~ s/\s//;
$suma=0;
undef @gen;
#@c = split(":",$k);
#$length = $c[-1];
if (exists($longitud{$k}))
{
$length = $longitud{$k};
for($i=0;$i<=$length;$i++)
{
$gen[$i]=0;
}
foreach $p (@{$position{$k}})
{
for($j=$p;($j<=$p+100 && $j<$length);$j++)
{
$gen[$j]=1;
}
}
for($i=0;$i<scalar(@gen);$i++)
{
$suma += $gen[$i];
}
#print "$k\n";
$coverage{$k} = $suma/$length;
}else{
$coverage{$k} =0;
}
}
foreach $k (keys(%counts))
{
#print "$k\n";
#@c = split(":",$k);
#$length = $c[-1];
$length = $longitud{$k};
#print "$k\t$length\n";
$abunRel = ($counts{$k}/($length+1))*1000;
if(exists($countsSpecific{$k}))
{
print "$k\t$counts{$k}\t$abunRel\t$countsSpecific{$k}\t$coverage{$k}\n";
}else{
print "$k\t$counts{$k}\t$abunRel\t0\t$coverage{$k}\n";
}
}