-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeocoding_tweets.py
94 lines (82 loc) · 3.09 KB
/
geocoding_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import sys
from geopy.geocoders import Nominatim
import geopy
from flask.json import jsonify
from flask import Flask
app = Flask(__name__)
def shorten_json(json_file):
short_json = []
# # file with json text saved
# with open(json_file) as f:
# j = json.load(f)
# # json.dump(j, sys.stdout, indent = 2)
# for i, item in enumerate(j):
# entry = {'created_at': item['created_at'], 'id_str': item['id_str'], 'text': item['text'], 'hashtags': item['entities']['hashtags'], 'name': item['user']['name'], 'screen_name': item['user']['screen_name'], 'location': item['user']['location'], 'time_zone': item['user']['time_zone'], 'coordinates': item['coordinates'], 'retweet_count': item['retweet_count'], 'favorite_count': item['favorite_count'] }
# short_json.append(entry)
#variable
j = json.loads(json_file)
for i, item in enumerate(j):
entry = {'created_at': item['created_at'], 'id_str': item['id_str'], 'text': item['text'], 'hashtags': item['entities']['hashtags'], 'name': item['user']['name'], 'screen_name': item['user']['screen_name'], 'location': item['user']['location'], 'time_zone': item['user']['time_zone'], 'coordinates': item['coordinates'], 'retweet_count': item['retweet_count'], 'favorite_count': item['favorite_count']}
short_json.append(entry)
return short_json
def geolocate_tweet(json_file):
geolocator = Nominatim()
short_json = json_file
result = []
for i, item in enumerate(short_json):
# tweet_coordinates = item['coordinates']
tweet_location = item['location']
tweet_timezone = item['time_zone']
# if tweet_coordinates != None:
# print("coordinates")
# print(tweet_coordinates)
# break
# # location = geolocator.geocode(tweet_coordinates)
# # print(location.address)
# # print(location.latitude, location.longitude)
if tweet_location != "":
# print("location")
# print(tweet_location)
try:
location = geolocator.geocode(tweet_location, timeout = 1)
if location != None:
item['address'] = location.address
item['lng'] = location.longitude
item['lat'] = location.latitude
result.append(item)
continue
except geopy.exc.GeocoderTimedOut:
pass
# print(location.address)
# print(location.latitude, location.longitude)
if tweet_timezone != None:
# print("timezone")
# print(tweet_timezone)
try:
location = geolocator.geocode(tweet_timezone, timeout = 1)
print(location)
if location != None:
item['address'] = location.address
item['lng'] = location.longitude
item['lat'] = location.latitude
result.append(item)
else:
item['address'] = "Location not defined"
item['lng'] = "-10.0"
item['lat'] = "-60.0"
result.append(item)
continue
except geopy.exc.GeocoderTimedOut:
pass
# print(location.address)
# print(location.latitude, location.longitude)
return result
@app.route("/api/tweets")
def get_all_markers(json_file):
markers = json_file
return markers
# short_json = shorten_json("test_tweets.json")
# short_json = geolocate_tweet(short_json)
# # pretty printing
# json.dump(short_json, sys.stdout, indent = 2)