-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaxel_download.py
79 lines (57 loc) · 2.34 KB
/
axel_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from tomorrow import threads
import imghdr
import requests
import os
from itertools import islice
import sys
import logging
import glob
def split(x):
first = x.find(',')
return (x[:first], x[first+1:])
def main(args):
if args.fail_file is not None:
logging.basicConfig(filename=args.fail_file, format='%(message)s', level=logging.ERROR)
@threads(args.threads)
def download(item_id, url, i, images_dir='', fails=None):
f = glob.glob(images_dir + "/" + item_id + "*")
if f:
return
try:
ret = os.system("./proxychains-ng/proxychains4 -q -f ./proxychains-ng/src/proxychains.conf axel -q -n 10 -o " + item_id + " \"" + url + "\"")
if ret == 0:
image_type = imghdr.what(item_id)
if image_type is not None:
os.system("mv " + item_id + " " + images_dir + "/" + item_id + '.' +image_type)
else:
logging.error('%s\t%s\tunknown_type' % (item_id, url))
fails.write(item_id + "," + url)
else:
logging.error('%s\t%s\tstatus:%d' % (item_id, url, ret))
fails.write(item_id + "," + url)
except KeyboardException:
raise
except:
print "Unexpected error:", sys.exc_info()[0]
logging.error(sys.exc_info()[0])
if i % 200 == 0:
print i
fails = open(args.failures)
f = open(args.urls)
itr = enumerate(f)
itr = islice(itr, args.start, None)
for i, line in itr:
[item_id, url] = split(line.strip())
download(item_id, url, i, images_dir=args.images_dir, fail_file=fails)
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()
# Data handling parameters
parser.add_argument('--urls', dest='urls', type=str, default=None, required=True, help='urls')
parser.add_argument('--image_dir', dest='images_dir', type=str, default='images', help='image directory')
parser.add_argument('--failures', dest='fail_file', type=str, default=None, help='failure records')
parser.add_argument('--start', dest='start', type=int, default=0, help='start offset')
parser.add_argument('--threads', dest='threads', type=int, default=10, help='threads')
args = parser.parse_args()
main(args)
exit(0)