-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmake_index.py
37 lines (29 loc) · 962 Bytes
/
make_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def make_index():
index={}
f_crawled=open("crawled.txt","r")
#print f_crawled.readlines()
for line in f_crawled.readlines():
line=line.replace('\n','')
url=line
line=line[7:]
line=line.replace('/','-')
line=line+'.txt'
f_file=open(line,"r")
content=f_file.read()
words=content.split()
for word in words:
add_to_index(index,word,url)
return index
def add_to_index(index,keyword,url):
if keyword in index:
if url not in index[keyword]:
index[keyword].append(url)
else:
index[keyword]=[url]
def lookup(index,keyword):
if keyword in index:
return index[keyword]
else:
return None
index=make_index()
print index