-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathuberdownload.py
105 lines (85 loc) · 3.82 KB
/
uberdownload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import requests
import time
import json
import sys
import os
import os.path
from loguru import logger
def get_page_with_wait(url, wait=6, max_retries=1, current_retry_count=0): # SGF throttling is 10/minute
if wait < 0.01:
wait = 0.01
try:
time.sleep(wait)
response = requests.get(url)
except HTTPError as e:
if e.code == 429: # too many requests
print("Too many requests / minute, falling back to {} seconds between fetches.".format(int(1.5 * wait)))
# exponential falloff
return get_page_with_wait(url, wait=(1.5 * wait))
# raise #Commented to allow script to continue
if e.code == 403:
raise Exception("forbidden to access URL.")
except URLError as e:
# sometimes DNS or the network temporarily falls over, and will come back if we try again
if current_retry_count < max_retries:
return get_page_with_wait(url, 5,
current_retry_count=current_retry_count + 1) # Wait 5 seconds between retries
print("Can't fetch '{}'. Check your network connection.".format(url))
# raise #Commented to allow script to continue
else:
logger.debug(f"Response is {response}. headers = {response.headers}")
if response.headers['content-type'] == 'application/json':
return response.json()
if response.headers['content-type'] == 'application/x-go-sgf; charset=utf-8':
# logger.debug(f"Returning text even though bytes are {response.content}")
return response.content
raise Exception(f"Response headers {response.headers['content-type']} not recognized.")
def results(url):
while url is not None:
data = get_page_with_wait(url, 0)
for _ in data["results"]:
yield _
url = data["next"]
def user_games(user_id):
url = "https://online-go.com/api/v1/players{}/games?ended__isnull=0&ordering=-ended&page_size=5&format=json".format(user_id)
# url = "https://online-go.com/api/v1/players/{}/games/?format=json".format(user_id)
for _ in results(url):
yield _["id"]
def user_reviews(user_id):
return
url = "https://online-go.com/api/v1/reviews/?owner__id={}&format=json".format(user_id)
for r in results(url):
yield r["id"], r["game"]["id"]
def reviews_for_game(game_id):
return
url = "https://online-go.com/api/v1/games/{}/reviews?format=json".format(game_id)
for r in results(url):
yield r["id"]
def save_sgf(out_filename, SGF_URL, name):
if os.path.exists(out_filename):
print("Skipping {} because it has already been downloaded.".format(name))
else:
print("Downloading {}...".format(name))
sgf = get_page_with_wait(SGF_URL)
if not sgf:
print("Skipping {} because it encountered an error.".format(name))
else:
with open(out_filename, "wb") as f:
f.write(sgf)
if __name__ == "__main__":
user_id = int(sys.argv[1])
dest_dir = sys.argv[2]
if not os.path.exists(dest_dir):
os.mkdir(dest_dir)
for g in user_games(sys.argv[1]):
save_sgf(os.path.join(dest_dir, "OGS_game_{}.sgf".format(g)),
"https://online-go.com/api/v1/games/{}/sgf".format(g),
"game {}".format(g))
for r in reviews_for_game(g):
save_sgf(os.path.join(dest_dir, "OGS_game_{}_review_{}.sgf".format(g, r)),
"https://online-go.com/api/v1/reviews/{}/sgf".format(g),
"review {} of game {}".format(r, g))
for r, g in user_reviews(sys.argv[1]):
save_sgf(os.path.join(dest_dir, "OGS_game_{}_review_{}.sgf".format(g, r)),
"https://online-go.com/api/v1/reviews/{}/sgf".format(g),
"review {} of game {}".format(r, g))