-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmovie.py
62 lines (33 loc) · 1.15 KB
/
movie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import urllib2
import re
nameOf1 = 'Akshay_Kumar'
nameOf2 = 'Sunil_Shetty'
str1 = "http://live.dbpedia.org/page/" + nameOf1
str2 = "http://live.dbpedia.org/page/" + nameOf2
#str2 = "http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=jsonfm&titles="+ nameOf2 +"&rvsection=0"
url1 = urllib2.urlopen(str1);
url2 = urllib2.urlopen(str2);
orig1 = url1.read();
orig2 = url2.read();
pos_1 = [m.start() for m in re.finditer('dbpprop:starring', orig1)]
pos_2 = [m.start() for m in re.finditer('dbpprop:starring', orig2)]
l1 = len(pos_1)
l2 = len(pos_2)
movies_1 = ["" for i in xrange(l1)]
movies_2 = ["" for i in xrange(l2)]
#print len(x1)
for i in range(l1):
pos2 = orig1.find('<small>dbpedia</small>:', pos_1[i]) + len('<small>dbpedia</small>:')
pos3 = orig1.find('</a>', pos2)
movies_1[i] = orig1[pos2: pos3]
#print movies_1
for i in range(l2):
pos2 = orig2.find('<small>dbpedia</small>:', pos_2[i]) + len('<small>dbpedia</small>:')
pos3 = orig2.find('</a>', pos2)
movies_2[i] = orig2[pos2: pos3]
#print movies_2
if 1:
for i in range(l1):
for j in range(l2):
if movies_1[i] == movies_2[j]:
print movies_2[j]