-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjustTry.py
58 lines (47 loc) · 1.92 KB
/
justTry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import readYelp
import words
import businessVectors
import businessFeatureVector as bfv
import userReview
def getWords(i=0):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json', i)
#print "Got the data! Got ", l, " lines of data"
(all_w, per) = words.bagOfWords(d)
return (all_w, per)
def getBusVector(i=0):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_business.json', i)
#print "Got the data! Got ", l, " lines of data"
(n, vectors) = businessVectors.busFeatureVector(d)
return (n, vectors)
def getBusFeatVector(i=0, include_cats = True, only_cats = False, smart = True):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_business.json', i)
#print "Got the data! Got ", l, " lines of data"
featVectors = [bfv.Business(bus, include_cats = include_cats, only_cats = only_cats, smart = smart) for bus in d]
#(n, vectors) = businessVectors.busFeatureVector(d)
return featVectors
def getUserReviews(i=0):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json', i)
#print "Got the data! Got ", l, " lines of data"
per = userReview.reviewsPerUser(d)
return per
def getReviewsTest(i=0, n=10):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json', i)
#print "Got the data! Got ", l, " lines of data"
(train, test) = userReview.reviewsPerUserTest(d)
return (train, test)
def crossUserReviewsBus(minrev=15, maxrev=1000, i=0):
d_user = getUserReviews(i)
(n, d_buss) = getBusVector(i)
output = {}
for user, revs in d_user.iteritems():
to_delete = []
for buss in revs:
if not buss in d_buss:
to_delete.append(buss)
for buss in to_delete:
del d_user[user][buss]
if len(d_user[user])>=minrev and len(d_user[user])<=maxrev:
output[user] = d_user[user]
return (output, d_buss)
#n,d = getBusVector(100)
#d = getUserReviews(10)