-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathschool_search.py
94 lines (63 loc) · 2.26 KB
/
school_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import time
import utils
TEXT_SEPARATOR = " "
class Index:
def __init__(self, display, token):
self.display = display
self.token = token
def _tokenize(text: str):
rv = [text]
words = text.split(TEXT_SEPARATOR)
if len(rv) == len(words):
return rv
rv = rv + words
if len(words) == 2:
return rv
for phase_length in range(2, len(words)):
for idx, word in enumerate(words):
phase_words = [word]
next_word_idx = idx
while len(phase_words) < phase_length:
next_word_idx = next_word_idx + 1
phase_words.append(words[next_word_idx])
phase = [TEXT_SEPARATOR.join(phase_words)]
rv = rv + phase
if next_word_idx == len(words) - 1:
break
return rv
def _make_indices(file_path) -> list:
columns, rows = utils.csv_to_lists(file_path)
reverted_indices = []
for row in rows:
school_name = row[columns.index(utils.SCHOOL_NAME_C)].upper()
state = row[columns.index(utils.STATE_C)].upper()
city = row[columns.index(utils.LCITY_C)].upper()
reverted_indices.append(Index(display=f'{school_name}\n {city}, {state}',
token=_tokenize(school_name) + _tokenize(state) + _tokenize(city)))
return reverted_indices
def _pre_processing():
return _make_indices('./school_data.csv')
indices = _pre_processing()
def search_schools(query: str):
start = time.time()
u_query = query.upper()
scores = []
for doc in indices:
search_terms = _tokenize(u_query)
matches = [t for t in search_terms if t in doc.token]
scores.append({
'index': doc.display,
'score': len(matches) / len(search_terms),
})
scores.sort(key=lambda s: s['score'], reverse=True)
rv = scores[0:3]
end = time.time()
print(f'Results for "{query}" (search took: {end - start}s)')
for idx, item in enumerate(rv):
score = item['score']
if idx == 0 and score == 0:
print('There is no data match. :(')
if score != 0:
display_text = item['index']
print(f'{idx + 1}.{display_text}')
search_schools("elementary school highland park")