-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrc-non-ascii
executable file
·26 lines (21 loc) · 993 Bytes
/
rc-non-ascii
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env raku
# ID characters & counts, from file(s), showing anything not pure ASCII (i.e. .ords.any > 127)
## 2019-11-09
# croaks on malformed UTF-8, so helpful in tracking that down
# also useful for ID'ing Unicode that won't display properly
# 2022-12-18 segfault? bug in the volume of data piped
# 191.765u 0.711s 3:11.99 100.2% bare junction 'any', ouch
# 22.518u 0.411s 0:22.43 102.1% .sort.tail
# 4.214u 0.284s 0:04.05 110.8% just .ord (but then miss some combining chars)
for @*ARGS -> $f {
#next unless $f.IO ~~ :f and my %codes = $f.IO.slurp.comb.grep(*.ords.sort.tail > 127).Bag;
# 'Bag' replaces idiom '%codes{$_}++ for'
# inefficient kludge for now
next unless $f.IO ~~ :f;
my %codes;
for $f.IO.slurp.comb { %codes{$_}++ if .ords.tail > 127; }
next unless keys %codes;
say "\n$f";
printf "%1s: %5d %s\n", $_, %codes{$_}, .uninames.join(', ') for %codes.keys.sort;
CATCH { default { say qq{Died in "$f" with "$_" } } }
}