generated from OpenPecha/new-repo-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathold_db_parser.py
53 lines (42 loc) · 1.45 KB
/
old_db_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from utils import csv_to_df, get_new_df
def get_sense(explanations, notes):
senses = {}
return senses
def is_key_word(key_word):
if "རྒྱུན་སྤྱོད" in key_word:
return True
return False
def is_new_word(new_word):
if "མ་ཚིག་གསར་པ" in new_word:
return True
return False
def get_register(use, archaic):
if use == "" and archaic:
return archaic
elif use:
return use
return ""
def get_new_word(row):
sense = get_sense(row['explanation'], row['note'])
new_word = {
"word_id": row["wordindexid"],
"lemma": row["word"],
"origin": row["origin"],
"sense": sense,
"is_key_word": is_key_word(row["key_word"]),
"is_new_word": is_new_word(row["newword"]),
"register": get_register(row["use"], row["archaic"]),
"editor": row["editor"],
"editor_group": row["editor_group"],
"datetime": row["datetime"],
"monlamitemploye": row["monlamitemploye"],
}
def convert_to_new_db(df):
for index, row in df.iterrows():
new_word = get_new_word(row)
save_new_word(new_word)
def parse_old_db(file_path):
df = csv_to_df(file_path)
columns =["wordindexid","word","explanation","note","key_word","origin","image","archaic","newword","use","noun_tayp","editor","editor_group","datetime","monlamitemploye",]
new_df = get_new_df(df, columns)
convert_to_new_db(new_df)