diff --git a/humblebundle_downloader/cli.py b/humblebundle_downloader/cli.py index 16f5bc9..dd4ff83 100644 --- a/humblebundle_downloader/cli.py +++ b/humblebundle_downloader/cli.py @@ -85,8 +85,8 @@ def cli(): generate_cookie(cli_args.cookie_file) elif cli_args.action == 'download': - from .download_library import download_library - download_library( + from .download_library import DownloadLibrary + DownloadLibrary( cli_args.cookie_file, cli_args.library_path, progress_bar=cli_args.progress, @@ -94,4 +94,4 @@ def cli(): ext_exclude=cli_args.exclude, platform_include=cli_args.platform, purchase_keys=cli_args.keys, - ) + ).start() diff --git a/humblebundle_downloader/download_library.py b/humblebundle_downloader/download_library.py index c261753..3a2ec69 100644 --- a/humblebundle_downloader/download_library.py +++ b/humblebundle_downloader/download_library.py @@ -1,4 +1,5 @@ import os +import sys import json import parsel import logging @@ -17,138 +18,179 @@ def _clean_name(dirty_str): return "".join(clean).strip() -def download_library(cookie_path, library_path, progress_bar=False, - ext_include=None, ext_exclude=None, platform_include=None, - purchase_keys=None): - if ext_include is None: - ext_include = [] - ext_include = list(map(str.lower, ext_include)) +class DownloadLibrary: - if ext_exclude is None: - ext_exclude = [] - ext_exclude = list(map(str.lower, ext_exclude)) + def __init__(self, cookie_path, library_path, progress_bar=False, + ext_include=None, ext_exclude=None, platform_include=None, + purchase_keys=None): - if platform_include is None or 'all' in platform_include: - # if all then we do not even need to use this feature - platform_include = [] - platform_include = list(map(str.lower, platform_include)) + with open(cookie_path, 'r') as f: + self.account_cookies = f.read() - # Load cookies - with open(cookie_path, 'r') as f: - account_cookies = f.read() + self.library_path = library_path + self.progress_bar = progress_bar + self.ext_include = [] if ext_include is None else list(map(str.lower, ext_include)) # noqa: E501 + self.ext_exclude = [] if ext_exclude is None else list(map(str.lower, ext_exclude)) # noqa: E501 - cache_file = os.path.join(library_path, '.cache.json') + if platform_include is None or 'all' in platform_include: + # if 'all', then do not need to use this check + platform_include = [] + self.platform_include = list(map(str.lower, platform_include)) - try: - with open(cache_file, 'r') as f: - cache_data = json.load(f) - except FileNotFoundError: - cache_data = {} + self.cache_file = os.path.join(library_path, '.cache.json') + self.cache_data = self._load_cache_data(self.cache_file) - if not purchase_keys: + self.purchase_keys = purchase_keys if purchase_keys else self._get_purchase_keys() # noqa: E501 + + def start(self): + for order_id in self.purchase_keys: + self._process_order_id(order_id) + + def _process_order_id(self, order_id): + order_url = 'https://www.humblebundle.com/api/v1/order/{order_id}?all_tpkds=true'.format(order_id=order_id) # noqa: E501 + order_r = requests.get(order_url, + headers={'cookie': self.account_cookies}) + logger.debug("Order request: {order_r}".format(order_r=order_r)) + order = order_r.json() + bundle_title = _clean_name(order['product']['human_name']) + logger.info("Checking bundle: " + str(bundle_title)) + for product in order['subproducts']: + self._process_product(order_id, bundle_title, product) + + def _process_product(self, order_id, bundle_title, product): + product_title = _clean_name(product['human_name']) + # Get all types of download for a product + for download_type in product['downloads']: + if self._should_download_platform(download_type['platform']) is False: # noqa: E501 + logger.info("Skipping {platform} for {product_title}" + .format(platform=download_type['platform'], + product_title=product_title)) + continue + + product_folder = os.path.join( + self.library_path, bundle_title, product_title + ) + + # Create directory to save the files to + try: os.makedirs(product_folder) # noqa: E701 + except OSError: pass # noqa: E701 + + # Download each file type of a product + for file_type in download_type['download_struct']: + try: + url = file_type['url']['web'] + except KeyError: + logger.info("No url found: {bundle_title}/{product_title}" + .format(bundle_title=bundle_title, + product_title=product_title)) + continue + + url_filename = url.split('?')[0].split('/')[-1] + cache_file_key = order_id + ':' + url_filename + ext = url_filename.split('.')[-1] + if self._should_download_file_type(ext) is False: + logger.info("Skipping the file {url_filename}" + .format(url_filename=url_filename)) + continue + + local_filename = os.path.join(product_folder, url_filename) + product_r = requests.get(url, stream=True) + logger.debug("Item request: {product_r}, Url: {url}" + .format(product_r=product_r, url=url)) + # Not sure which value will be best to use, so use them all + file_info = { + 'md5': file_type.get('md5'), + 'sha1': file_type.get('sha1'), + 'url_last_modified': product_r.headers['Last-Modified'], + 'url_etag': product_r.headers['ETag'][1:-1], + 'url_crc': product_r.headers['X-HW-Cache-CRC'], + } + if file_info != self.cache_data.get(cache_file_key, {}): + try: + self._download_file(product_r, local_filename) + + except (Exception, KeyboardInterrupt) as e: + if self.progress_bar: print() + logger.error("Failed to download file {product_title}/{url_filename}" # noqa: E501 + .format(product_title=product_title, + url_filename=url_filename)) + + # Clean up broken downloaded file + try: os.remove(local_filename) # noqa: E701 + except OSError: pass # noqa: E701 + + if type(e).__name__ == 'KeyboardInterrupt': + sys.exit() + else: + continue + + else: + if self.progress_bar: print() + self._update_cache_data(cache_file_key, file_info) + + finally: + # Since its a stream connection, make sure to close it + product_r.connection.close() + + def _update_cache_data(self, cache_file_key, file_info): + self.cache_data[cache_file_key] = file_info + # Update cache file with newest data so if the script + # quits it can keep track of the progress + # Note: Only safe because of single thread, + # need to change if refactor to multi threading + with open(self.cache_file, 'w') as outfile: + json.dump( + self.cache_data, outfile, + sort_keys=True, indent=4, + ) + + def _download_file(self, product_r, local_filename): + logger.info("Downloading: {local_filename}" + .format(local_filename=local_filename)) + + with open(local_filename, 'wb') as outfile: + total_length = product_r.headers.get('content-length') + if total_length is None: # no content length header + outfile.write(product_r.content) + else: + dl = 0 + total_length = int(total_length) + for data in product_r.iter_content(chunk_size=4096): + dl += len(data) + outfile.write(data) + pb_width = 50 + done = int(pb_width * dl / total_length) + if self.progress_bar: + print("\t{percent}% [{filler}{space}]" + .format(local_filename=local_filename, + percent=int(done * (100 / pb_width)), + filler='=' * done, + space=' ' * (pb_width - done), + ), end='\r') + + def _load_cache_data(self, cache_file): + try: + with open(cache_file, 'r') as f: + cache_data = json.load(f) + except FileNotFoundError: + cache_data = {} + + return cache_data + + def _get_purchase_keys(self): library_r = requests.get('https://www.humblebundle.com/home/library', - headers={'cookie': account_cookies}) + headers={'cookie': self.account_cookies}) logger.debug("Library request: " + str(library_r)) library_page = parsel.Selector(text=library_r.text) orders_json = json.loads(library_page.css('#user-home-json-data') .xpath('string()').extract_first()) - purchase_keys = orders_json['gamekeys'] + return orders_json['gamekeys'] - for order_id in purchase_keys: - order_url = 'https://www.humblebundle.com/api/v1/order/{order_id}?all_tpkds=true'.format(order_id=order_id) # noqa: E501 - order_r = requests.get(order_url, - headers={'cookie': account_cookies}) - logger.debug("Order request: " + str(order_r)) - order = order_r.json() - bundle_title = _clean_name(order['product']['human_name']) - logger.info("Checking bundle: " + str(bundle_title)) - for item in order['subproducts']: - item_title = _clean_name(item['human_name']) - # Get all types of download for a product - for download_type in item['downloads']: - # Type of product, ebook, videos, etc... + def _should_download_platform(self, platform): + platform = platform.lower() + return self.platform_include and platform not in self.platform_include - platform = download_type['platform'].lower() - if (platform_include and platform not in platform_include): - logger.info("Do not want " + platform - + ", Skipping " + item_title) - continue - - item_folder = os.path.join( - library_path, bundle_title, item_title - ) - - # Create directory to save the files to - try: os.makedirs(item_folder) # noqa: E701 - except OSError: pass # noqa: E701 - - # Download each file type of a product - for file_type in download_type['download_struct']: - try: - url = file_type['url']['web'] - except KeyError: - logger.info("No url found for " + bundle_title - + "/" + item_title) - continue - - url_filename = url.split('?')[0].split('/')[-1] - cache_file_key = order_id + ':' + url_filename - ext = url_filename.split('.')[-1] - # Only get the file types we care about - if ((ext_include and ext.lower() not in ext_include) - or (ext_exclude and ext.lower() in ext_exclude)): - logger.info("Skipping the file " + url_filename) - continue - - filename = os.path.join(item_folder, url_filename) - item_r = requests.get(url, stream=True) - logger.debug("Item request: {item_r}, Url: {url}" - .format(item_r=item_r, url=url)) - # Not sure which value will be best to use, so use them all - file_info = { - 'md5': file_type.get('md5'), - 'sha1': file_type.get('sha1'), - 'url_last_modified': item_r.headers['Last-Modified'], - 'url_etag': item_r.headers['ETag'][1:-1], - 'url_crc': item_r.headers['X-HW-Cache-CRC'], - } - if file_info != cache_data.get(cache_file_key, {}): - if not progress_bar: - logger.info("Downloading: {item_title}/{url_filename}" # noqa: E501 - .format(item_title=item_title, - url_filename=url_filename)) - - with open(filename, 'wb') as outfile: - total_length = item_r.headers.get('content-length') - if total_length is None: # no content length header - outfile.write(item_r.content) - else: - dl = 0 - total_length = int(total_length) - for data in item_r.iter_content(chunk_size=4096): # noqa E501 - dl += len(data) - outfile.write(data) - pb_width = 50 - done = int(pb_width * dl / total_length) - if progress_bar: - print("Downloading: {item_title}/{url_filename}: {percent}% [{filler}{space}]" # noqa E501 - .format(item_title=item_title, - url_filename=url_filename, - percent=int(done * (100 / pb_width)), # noqa E501 - filler='=' * done, - space=' ' * (pb_width - done), # noqa E501 - ), end='\r') - - if progress_bar: - # print new line so next progress bar - # is on its own line - print() - - cache_data[cache_file_key] = file_info - # Update cache file with newest data so if the script - # quits it can keep track of the progress - with open(cache_file, 'w') as outfile: - json.dump( - cache_data, outfile, - sort_keys=True, indent=4, - ) + def _should_download_file_type(self, ext): + ext = ext.lower() + return ((self.ext_include and ext.lower() not in self.ext_include) + or (self.ext_exclude and ext.lower() in self.ext_exclude))