-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize_numbers.py
89 lines (74 loc) · 2.34 KB
/
normalize_numbers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from spell_number import get_is_number
import sys
def read_file(fname):
if fname == "-":
f = sys.stdin
else:
f = open(fname)
for l in f:
yield l
def split_phone_number(n):
n = n.replace(" ", "")
PATTERNS = [
"{} {} {} {} {} {} {}",
"{} {}{} {}{}{}{}",
"{} {}{} {}{} {}{}",
"{} {} {} {}{} {} {}",
"{}{} {}{} {} {} {}",
"{}{} {}{} {}{}{}",
"{} {}{} {} {} {} {}",
"{} {} {} {}{} {} {}",
"{} {} {} {} {} {}{}",
]
for pattern in PATTERNS:
split = pattern.format(*list(n))
if not any([len(s) > 1 and s[0] == "0" for s in split.split(" ")]):
yield split
def split_kennitala(n):
n = n.replace("-", "")
PATTERNS = [
"{} {} {} {} {} {} {} {} {} {}",
"{}{} {}{} {}{} {}{} {}{}",
"{}{} {}{} {}{} {}{} {} {}",
"{}{} {}{} {}{} {} {} {}{}",
"{}{} {}{} {}{} {} {} {} {}",
"{}{} {}{} {} {} {} {} {} {}",
"{}{} {}{} {} {} {} {} {}{}",
"{}{} {}{} {} {} {}{} {} {}",
"{}{} {}{} {} {} {}{} {}{}",
"{} {} {}{} {} {} {}{} {}{}",
"{}{} {} {} {}{} {}{} {}{}",
]
for pattern in PATTERNS:
split = pattern.format(*list(n))
if not any([len(s) > 1 and s[0] == "0" for s in split.split(" ")]):
yield split
def phone_nubmer(number):
number = number.strip()
return " ".join([get_is_number(int(n), gender="kk") for n in number.split(" ")])
def kennitala(number):
number = number.strip()
return " ".join([get_is_number(int(n), gender="kk") for n in number.split(" ")])
def print_help():
print("""
Usage: python normalize_numbers.py <entity:str> <in_file:str>
E.g.: python normalize_numbers.py phone_number phone_numbes.txt
Normalize numerical data. Supported entities: [kennitala, phone_number]
""")
if __name__ == "__main__":
try:
entity = sys.argv[1]
fname = sys.argv[2]
except Exception:
print_help()
exit()
if entity == "kennitala":
for n in read_file(fname):
for s in split_kennitala(n):
print(kennitala(s))
elif entity == "phone_number":
for n in read_file(fname):
for s in split_phone_number(n):
print(phone_nubmer(s))
else:
print_help()