-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathhistbin
executable file
·70 lines (63 loc) · 1.86 KB
/
histbin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/awk -f
#
# Copyright (c) 2016 Douglas G. Scofield, Uppsala University
#
# This code is covered by the Gnu GPLv2, please see LICENSE.
# Bugs and suggestions to https://github.com/douglasgscofield/tinyutils/issues.
#
# Calculates a histogram of input values placed into integer bins of a given size.
# Bins are half-open [i,i+bin) and bin boundaries are modulo bin.
#
#
# CHANGELOG
# 2016-03-29 : create the script
BEGIN {
# parameters
FS = "\t";
OFS = "\t";
bin = 10; # bin size
drop_zero = 0; # drop zero-valued entries, implies override=1
header = 0; # does the input have a header? if so how many lines?
print_header = 0; # print a simple "bin", "count" header on the output
skip_comment = 1; # skip '#' lines on input
col = 1; # which column do we operate on?
# internal variables
the_min = 1.0e+20 # may be too low for your application
the_max = -1.0e+20 # may be too high for your application
}
function floor_to_bin(v, ans) {
vv = v / bin;
ans = int(vv);
ans = ((ans > vv) ? ans - 1 : ans);
return(ans * bin);
}
{
if (header && NR <= header) next;
if (skip_comment && $0 ~ /^#/) next;
bin = bin + 0.0;
if (NF < col) {
print "on input line " NR " missing requested column" > "/dev/stderr";
next;
}
val = floor_to_bin($(col) + 0.0);
if (! a[val]) { # initialize counts as floats
a[val] = 1.0;
} else {
a[val] += 1.0;
}
if (val > the_max) {
the_max = val;
} else if (val < the_min) {
the_min = val;
}
}
END {
if (print_header)
print "bin","count";
for (i = the_min; i <= the_max; i += bin) {
if (drop_zero && a[i] == 0.0) continue;
printf("%.0f%s%.0f\n", i, OFS, a[i] ? a[i] : 0.0);
}
}