-
Notifications
You must be signed in to change notification settings - Fork 109
/
Copy pathreadDic.py
32 lines (29 loc) · 1.18 KB
/
readDic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
def main(searchText):
# searchTxt = input('your search txt: ')
charExp = r"(?<=^\*)\w{1}"
wordExp = r"(?<=^\…)\w+(?=\@)"
with open('modern-chinese-dic.txt', 'r', encoding='utf-8') as f:
modernDic = f.read()
testExpression = r'.*' + searchText + r'.*\n'
results = re.findall(testExpression, modernDic)
relatedWordsDic = {}
relatedCharactersDic = {}
for result in results:
matchedWord = re.search(wordExp, result)
if (matchedWord):
matchedWord = matchedWord.group()
if len(matchedWord) <= 2:
relatedWordsDic[matchedWord] = result
matchedCharacter = re.search(charExp, result)
if (matchedCharacter):
matchedCharacter = matchedCharacter.group()
relatedCharactersDic[matchedCharacter] = result
print(matchedCharacter)
print(relatedCharactersDic[matchedCharacter])
print(relatedCharactersDic)
return relatedCharactersDic
if __name__ == '__main__':
searchText = input('your search txt: ')
main(searchText)
# r":\S*?。"懒惰匹配去除所有例句