Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Primarily updates to add parameters which improve performance. #68

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
__pycache__
*cookies.txt
.tox/
build/
dist/
70 changes: 68 additions & 2 deletions humblebundle_downloader/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import logging
import argparse
from humblebundle_downloader._version import __version__

logger = logging.getLogger(__name__)

Expand All @@ -13,13 +14,47 @@
# Ignore unwanted logs from the requests lib when debuging
logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)

# convert a string representing size to an integer
def parse_size(size):
# Base10 unit definitions
# K=x1000 M=x10000000 G=x1000000000
# units = {"K": 10**3, "M": 10**6, "G": 10**9, "T": 10**12}
# Binary unit definitions:
# K=x1024 M=x1024x1024 etc
units = {"K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
try:
return int(size) # if it's an int already just return it
except ValueError: # it wasn't an int
size=size.upper()
if size.endswith('B'):
return parse_size(size[:-1])
unit=size[-1:]
number=size[:-1]
if unit not in units.keys():
raise ValueError(f'Invalid Unit: {unit}')
return int(float(number)*units[unit])

# convert a string represting time to an integer number of seconds
def parse_seconds(size):
# convert parameter to number of seconds
units = {"S": 1, "M": 60, "H": 60*60, "D": 60*60*24, 'W': 60*60*24*7}
try:
return int(size) # if it's an int already just return it
except ValueError: # it wasn't an int
size=size.upper()
unit=size[-1:]
number=size[:-1]
if unit not in units.keys():
raise ValueError(f'Invalid Unit: {unit}')
return int(float(number)*units[unit])
Comment on lines +17 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://pypi.org/project/humanfriendly/ is a library option for this functionality


def parse_args(args):
if args[0].lower() == 'download':
if ((len(args)>0) and (args[0].lower() == 'download')):
args = args[1:]
raise DeprecationWarning("`download` argument is no longer used")

parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='Download purchases from Humble Bundle',
epilog=f'Version: {__version__}')

cookie = parser.add_mutually_exclusive_group(required=True)
cookie.add_argument(
Expand Down Expand Up @@ -73,6 +108,32 @@ def parse_args(args):
help=("The purchase download key. Find in the url on the "
"products/bundle download page. Can set multiple"),
)
parser.add_argument(
'-b', '--write-buffer',
type=str, default=1024*1024,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this the only argument with a default set here while all the others are set in the download_library.py init?

help="Size of file buffer to use"
)
parser.add_argument(
'--chunk_size','--chunk',
type=str,
help='Download Chunk Size'
)
parser.add_argument(
'--timeout',
type=str,
help='Timeout (in seconds) for get requests'
)
parser.add_argument(
'--debug',
action='store_true',
help='Run in Debug Mode. Stops on Exceptions'
)
parser.add_argument(
'--keep',
action='store_true',
help='Keep files that fail download (ie: do not delete on failure)'
)


return parser.parse_args(args)

Expand All @@ -92,4 +153,9 @@ def cli():
purchase_keys=cli_args.keys,
trove=cli_args.trove,
update=cli_args.update,
write_buffer=parse_size(cli_args.write_buffer),
chunk_size=parse_size(cli_args.chunk_size),
timeout=parse_seconds(cli_args.timeout),
debug=cli_args.debug,
keep=cli_args.keep,
).start()
75 changes: 64 additions & 11 deletions humblebundle_downloader/download_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,23 @@ class DownloadLibrary:

def __init__(self, library_path, cookie_path=None, cookie_auth=None,
progress_bar=False, ext_include=None, ext_exclude=None,
write_buffer=1, chunk_size=4096, timeout=0,
platform_include=None, purchase_keys=None, trove=False,
keep=False, debug=False,
update=False):
self.library_path = library_path
self.progress_bar = progress_bar
self.ext_include = [] if ext_include is None else list(map(str.lower, ext_include)) # noqa: E501
self.ext_exclude = [] if ext_exclude is None else list(map(str.lower, ext_exclude)) # noqa: E501
self.write_buffer = write_buffer
self.chunk_size = chunk_size
self.timeout=timeout
self.debug=debug # debug flag will raise exceptions so the program can stop for debugging purposes
self.keep=keep # keep files on exception? Default is to os.remove file on exception

# these variables are used for the enhanced progress bar
self.total_downloaded = 0
self.start_time = datetime.datetime.now()

if platform_include is None or 'all' in platform_include:
# if 'all', then do not need to use this check
Expand Down Expand Up @@ -146,7 +157,7 @@ def _process_trove_product(self, title, product):
continue

try:
product_r = self.session.get(signed_url, stream=True)
product_r = self.session.get(signed_url, stream=True, timeout=self.timeout)
except Exception:
logger.error("Failed to get trove product {title}"
.format(title=web_name))
Expand Down Expand Up @@ -177,7 +188,7 @@ def _get_trove_products(self):
.format(idx=idx))
trove_page_url = trove_base_url.format(idx=idx)
try:
trove_r = self.session.get(trove_page_url)
trove_r = self.session.get(trove_page_url, timeout=self.timeout)
except Exception:
logger.error("Failed to get products from Humble Trove")
return []
Expand All @@ -201,6 +212,7 @@ def _process_order_id(self, order_id):
'content-type': 'application/json',
'content-encoding': 'gzip',
},
timeout=self.timeout,
)
except Exception:
logger.error("Failed to get order key {order_id}"
Expand Down Expand Up @@ -269,7 +281,7 @@ def _process_product(self, order_id, bundle_title, product):
continue

try:
product_r = self.session.get(url, stream=True)
product_r = self.session.get(url, stream=True, timeout=self.timeout)
except Exception:
logger.error("Failed to download {url}".format(url=url))
continue
Expand Down Expand Up @@ -331,9 +343,13 @@ def _process_download(self, open_r, cache_file_key, file_info,
logger.error("Failed to download file {local_filename}"
.format(local_filename=local_filename))

# Clean up broken downloaded file
try: os.remove(local_filename) # noqa: E701
except OSError: pass # noqa: E701
if self.debug:
raise

if not self.keep:
# Clean up broken downloaded file
try: os.remove(local_filename) # noqa: E701
except OSError: pass # noqa: E701

if type(e).__name__ == 'KeyboardInterrupt':
sys.exit()
Expand All @@ -348,31 +364,68 @@ def _process_download(self, open_r, cache_file_key, file_info,
# Since its a stream connection, make sure to close it
open_r.connection.close()

"""
This function will return a human-readable filesize-string
like "3.5 MB" for it's given 'num' parameter.
From http://stackoverflow.com/questions/1094841
"""
def _convert_size(self, num):
for units in ['B','KB','MB','GB','TB', 'PB', 'EB', 'ZB', 'YB']:
if num < 1024.0:
return "%6.2f %s" % (num, units)
num /= 1024.0
return "%6.2f %s" % (num, units)

def _download_file(self, product_r, local_filename):
logger.info("Downloading: {local_filename}"
.format(local_filename=local_filename))

with open(local_filename, 'wb') as outfile:
# progress bar width
pb_width = 40

# format strings for the progress bar
FILE_PROGRESS_FORMAT='\t{fspeed}/s\t{percent:3}% [{filler}{space}]'
FILE_FINISH_FORMAT=FILE_PROGRESS_FORMAT+' {tspeed}/s {downloaded:,}b/{seconds:.3f}s'

# download start time
dl_start=datetime.datetime.now()

with open(local_filename, 'wb', buffering=self.write_buffer) as outfile:
total_length = product_r.headers.get('content-length')
if total_length is None: # no content length header
outfile.write(product_r.content)
self.total_downloaded += len(product_r.content)
else:
# bytes downloaded
dl = 0
total_length = int(total_length)
for data in product_r.iter_content(chunk_size=4096):
for data in product_r.iter_content(chunk_size=self.chunk_size):
dl += len(data)
outfile.write(data)
pb_width = 50
done = int(pb_width * dl / total_length)
if self.progress_bar:
print("\t{percent}% [{filler}{space}]"
dl_time=datetime.datetime.now()-dl_start
print(FILE_PROGRESS_FORMAT
.format(percent=int(done * (100 / pb_width)),
filler='=' * done,
space=' ' * (pb_width - done),
fspeed=self._convert_size(int(dl/dl_time.total_seconds())),
), end='\r')

if dl != total_length:
raise ValueError("Download did not complete")
self.total_downloaded += dl
if self.progress_bar:
run_time=datetime.datetime.now()-self.start_time
print(FILE_FINISH_FORMAT
.format(tspeed=self._convert_size(int(self.total_downloaded/run_time.total_seconds())),
fspeed=self._convert_size(int(dl/dl_time.total_seconds())),
downloaded=self.total_downloaded,
seconds=run_time.total_seconds(),
percent=int(done * (100 / pb_width)),
filler='=' * done,
space=' ' * (pb_width - done),
), end='\r')

def _load_cache_data(self, cache_file):
try:
Expand All @@ -385,7 +438,7 @@ def _load_cache_data(self, cache_file):

def _get_purchase_keys(self):
try:
library_r = self.session.get('https://www.humblebundle.com/home/library') # noqa: E501
library_r = self.session.get('https://www.humblebundle.com/home/library', timeout=self.timeout) # noqa: E501
except Exception:
logger.exception("Failed to get list of purchases")
return []
Expand Down