forked from OpenPecha/ie-datasets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfnr.py
44 lines (35 loc) · 1.23 KB
/
fnr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import re
# file containing find/replace expressions separated by "\t-\t"
fnrExpressions = "human2rdr.txt"
# directory with the files on which to apply fnr
fnrDir = "humanrdr"
# new file prefix with following syntax: _foo_
newPrefix = "_"
# path to dir for output / optional
outDir = ''
def getRegex(fnrExpressions):
# extracts a list of fnr expressions from fnr.txt
regexList = []
with open(fnrExpressions, 'r', encoding='utf8') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip('\n\r')
find, replace = line.split("\t-\t",1)
regexList.append([find, replace])
return regexList
def fnr(string, fnrExpressions):
list = getRegex(fnrExpressions)
for pair in list:
string = re.sub(pair[0], pair[1], string)
return string
for dname, dirs, files in os.walk(fnrDir):
for fname in files:
if fname[:1] != '_':
iPath = os.path.join(dname, fname)
oPath = os.path.join(dname, f"{newPrefix}{fname}")
with open(iPath, 'r', encoding='utf-8') as f:
raws = f.read()
s = fnr(raws, fnrExpressions)
with open(oPath, "w", encoding='utf-8') as f:
f.write(s)