diff --git a/CHANGELOG.md b/CHANGELOG.md index db7535b..7a39570 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,16 @@ # Change log +### 0.2.0 +- Added **Humble Trove** support _(`--trove` to also check/download trove content)_ +- Now by default only new content is downloaded. Use `--update` to also check for updated content + + ### 0.1.3 - Fixed re-downloading for real this time - Only use the url last modified time as the check for new versions - + ### 0.1.2 - Stop using md5 & sha1 hashes to check if file is unique (was creating duplicate downloads of the same file) - Strip periods from end of directory & file names diff --git a/README.md b/README.md index b78dd8d..48cd82c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ The first time this runs it may take a while because it will download everything. After that it will only download the content that has been updated or is missing. ## Features -- downloads new and updated content from your Humble Bundle Library on each run +- support for Humble Trove _(`--trove` flag)_ +- downloads new and updated content from your Humble Bundle Library on each run _(only check for updates if using `--update`)_ - cli command for easy use (downloading will also work on a headless system) - works for SSO and 2FA accounts - optional progress bar for each item downloaded _(`--progress` flag)_ diff --git a/humblebundle_downloader/_version.py b/humblebundle_downloader/_version.py index 8ce9b36..7fd229a 100644 --- a/humblebundle_downloader/_version.py +++ b/humblebundle_downloader/_version.py @@ -1 +1 @@ -__version__ = '0.1.3' +__version__ = '0.2.0' diff --git a/humblebundle_downloader/cli.py b/humblebundle_downloader/cli.py index dd4ff83..66585ae 100644 --- a/humblebundle_downloader/cli.py +++ b/humblebundle_downloader/cli.py @@ -9,6 +9,8 @@ logging.basicConfig( level=LOG_LEVEL, format='%(message)s', ) +# Ignore unwanted logs from the requests lib when debuging +logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING) def cli(): @@ -32,8 +34,6 @@ def cli(): ### # Download Library ### - # TODO: have option to only get types, ebooks, videos, etc do not enforce, - # but lower and just string match to the type in the api parser_download = subparsers.add_parser( 'download', help="Download content in your humble bundle library", @@ -48,6 +48,15 @@ def cli(): help="Folder to download all content to", required=True, ) + parser_download.add_argument( + '-t', '--trove', action='store_true', + help="Only check and download Humble Trove content", + ) + parser_download.add_argument( + '-u', '--update', action='store_true', + help=("Check to see if products have been updated " + "(still get new products)"), + ) parser_download.add_argument( '-p', '--platform', type=str, nargs='*', @@ -94,4 +103,6 @@ def cli(): ext_exclude=cli_args.exclude, platform_include=cli_args.platform, purchase_keys=cli_args.keys, + trove=cli_args.trove, + update=cli_args.update, ).start() diff --git a/humblebundle_downloader/download_library.py b/humblebundle_downloader/download_library.py index b80c156..5cb5861 100644 --- a/humblebundle_downloader/download_library.py +++ b/humblebundle_downloader/download_library.py @@ -1,6 +1,7 @@ import os import sys import json +import time import parsel import logging import datetime @@ -23,10 +24,10 @@ class DownloadLibrary: def __init__(self, cookie_path, library_path, progress_bar=False, ext_include=None, ext_exclude=None, platform_include=None, - purchase_keys=None): + purchase_keys=None, trove=False, update=False): with open(cookie_path, 'r') as f: - self.account_cookies = f.read() + self.account_cookies = f.read().strip() self.library_path = library_path self.progress_bar = progress_bar @@ -43,14 +44,116 @@ class DownloadLibrary: self.purchase_keys = purchase_keys if purchase_keys else self._get_purchase_keys() # noqa: E501 + if trove is True: + logger.info("Checking Humble Trove...") + self.trove_products = self._get_trove_products() + else: + self.trove_products = [] + + self.update = update + def start(self): + for product in self.trove_products: + title = _clean_name(product['human-name']) + self._process_trove_product(title, product) + + # Always check your purchases for order_id in self.purchase_keys: self._process_order_id(order_id) + def _get_trove_download_url(self, machine_name, web_name): + sign_r = requests.post( + 'https://www.humblebundle.com/api/v1/user/download/sign', + data={ + 'machine_name': machine_name, + 'filename': web_name, + }, + headers={'cookie': self.account_cookies}, + ) + logger.debug("Signed url response {sign_r}".format(sign_r=sign_r)) + signed_url = sign_r.json()['signed_url'] + logger.debug("Signed url {signed_url}".format(signed_url=signed_url)) + return signed_url + + def _process_trove_product(self, title, product): + for download in product['downloads'].values(): + # Sometimes the name has a dir in it + # Example is "Broken Sword 5 - the Serpent's Curse" + # Only the windows file has a dir like + # "revolutionsoftware/BS5_v2.2.1-win32.zip" + web_name = download['url']['web'].split('/')[-1] + cache_file_key = 'trove:{name}'.format(name=web_name) + file_info = { + 'uploaded_at': download.get('uploaded_at'), + 'md5': download.get('md5'), + } + cache_file_info = self.cache_data.get(cache_file_key, {}) + + if cache_file_info != {} and self.update is not True: + # Do not care about checking for updates at this time + continue + + if (file_info['uploaded_at'] != cache_file_info.get('uploaded_at') + and file_info['md5'] != cache_file_info.get('md5')): + product_folder = os.path.join( + self.library_path, 'Humble Trove', title + ) + # Create directory to save the files to + try: os.makedirs(product_folder) # noqa: E701 + except OSError: pass # noqa: E701 + local_filename = os.path.join( + product_folder, + web_name, + ) + signed_url = self._get_trove_download_url( + download['machine_name'], + web_name, + ) + product_r = requests.get(signed_url, stream=True) + + if 'uploaded_at' in cache_file_info: + uploaded_at = time.strftime( + '%Y-%m-%d', + time.localtime(int(cache_file_info['uploaded_at'])) + ) + else: + uploaded_at = None + + self._process_download( + product_r, + cache_file_key, + file_info, + local_filename, + rename_str=uploaded_at, + ) + + def _get_trove_products(self): + trove_products = [] + idx = 0 + trove_base_url = 'https://www.humblebundle.com/api/v1/trove/chunk?index={idx}' # noqa: E501 + while True: + logger.debug("Collecting trove product data from api pg:{idx} ..." + .format(idx=idx)) + trove_page_url = trove_base_url.format(idx=idx) + trove_r = requests.get(trove_page_url, + headers={'cookie': self.account_cookies}) + page_content = trove_r.json() + + if len(page_content) == 0: + break + + trove_products.extend(page_content) + idx += 1 + + return trove_products + def _process_order_id(self, order_id): order_url = 'https://www.humblebundle.com/api/v1/order/{order_id}?all_tpkds=true'.format(order_id=order_id) # noqa: E501 order_r = requests.get(order_url, - headers={'cookie': self.account_cookies}) + headers={'cookie': self.account_cookies, + 'content-type': 'application/json', + 'content-encoding': 'gzip', + }) logger.debug("Order request: {order_r}".format(order_r=order_r)) order = order_r.json() bundle_title = _clean_name(order['product']['human_name']) @@ -58,6 +161,18 @@ class DownloadLibrary: for product in order['subproducts']: self._process_product(order_id, bundle_title, product) + def _rename_old_file(self, local_filename, append_str): + # Check if older file exists, if so rename + if os.path.isfile(local_filename) is True: + filename_parts = local_filename.rsplit('.', 1) + new_name = "{name}_{append_str}.{ext}"\ + .format(name=filename_parts[0], + append_str=append_str, + ext=filename_parts[1]) + os.rename(local_filename, new_name) + logger.info("Renamed older file to {new_name}" + .format(new_name=new_name)) + def _process_product(self, order_id, bundle_title, product): product_title = _clean_name(product['human_name']) # Get all types of download for a product @@ -71,7 +186,6 @@ class DownloadLibrary: product_folder = os.path.join( self.library_path, bundle_title, product_title ) - # Create directory to save the files to try: os.makedirs(product_folder) # noqa: E701 except OSError: pass # noqa: E701 @@ -95,58 +209,33 @@ class DownloadLibrary: continue local_filename = os.path.join(product_folder, url_filename) + cache_file_info = self.cache_data.get(cache_file_key, {}) + + if cache_file_info != {} and self.update is not True: + # Do not care about checking for updates at this time + continue + product_r = requests.get(url, stream=True) logger.debug("Item request: {product_r}, Url: {url}" .format(product_r=product_r, url=url)) - # Not sure which value will be best to use, so use them all file_info = { 'url_last_modified': product_r.headers['Last-Modified'], } - cache_file_info = self.cache_data.get(cache_file_key, {}) - if file_info != cache_file_info: - try: - # Check if older file exists, if so rename - if (os.path.isfile(local_filename) is True - and 'url_last_modified' in cache_file_info): - filename_parts = local_filename.rsplit('.', 1) - last_modified = datetime.datetime.strptime( - cache_file_info['url_last_modified'], - '%a, %d %b %Y %H:%M:%S %Z' - ).strftime('%Y-%m-%d') - new_name = "{name}_{date}.{ext}"\ - .format(name=filename_parts[0], - date=last_modified, - ext=filename_parts[1]) - os.rename(local_filename, new_name) - - self._download_file(product_r, local_filename) - - except (Exception, KeyboardInterrupt) as e: - if self.progress_bar: - # Do not overwrite the progress bar on next print - print() - logger.error("Failed to download file {product_title}/{url_filename}" # noqa: E501 - .format(product_title=product_title, - url_filename=url_filename)) - - # Clean up broken downloaded file - try: os.remove(local_filename) # noqa: E701 - except OSError: pass # noqa: E701 - - if type(e).__name__ == 'KeyboardInterrupt': - sys.exit() - else: - continue - + if file_info['url_last_modified'] != cache_file_info.get('url_last_modified'): # noqa: E501 + if 'url_last_modified' in cache_file_info: + last_modified = datetime.datetime.strptime( + cache_file_info['url_last_modified'], + '%a, %d %b %Y %H:%M:%S %Z' + ).strftime('%Y-%m-%d') else: - if self.progress_bar: - # Do not overwrite the progress bar on next print - print() - self._update_cache_data(cache_file_key, file_info) - - finally: - # Since its a stream connection, make sure to close it - product_r.connection.close() + last_modified = None + self._process_download( + product_r, + cache_file_key, + file_info, + local_filename, + rename_str=last_modified, + ) def _update_cache_data(self, cache_file_key, file_info): self.cache_data[cache_file_key] = file_info @@ -160,6 +249,38 @@ class DownloadLibrary: sort_keys=True, indent=4, ) + def _process_download(self, open_r, cache_file_key, file_info, + local_filename, rename_str=None): + try: + if rename_str: + self._rename_old_file(local_filename, rename_str) + + self._download_file(open_r, local_filename) + + except (Exception, KeyboardInterrupt) as e: + if self.progress_bar: + # Do not overwrite the progress bar on next print + print() + logger.error("Failed to download file {local_filename}" + .format(local_filename=local_filename)) + + # Clean up broken downloaded file + try: os.remove(local_filename) # noqa: E701 + except OSError: pass # noqa: E701 + + if type(e).__name__ == 'KeyboardInterrupt': + sys.exit() + + else: + if self.progress_bar: + # Do not overwrite the progress bar on next print + print() + self._update_cache_data(cache_file_key, file_info) + + finally: + # Since its a stream connection, make sure to close it + open_r.connection.close() + def _download_file(self, product_r, local_filename): logger.info("Downloading: {local_filename}" .format(local_filename=local_filename)) @@ -191,17 +312,6 @@ class DownloadLibrary: except FileNotFoundError: cache_data = {} - # Remove md5 & sha1 keys from legacy cache data - for key, value in cache_data.items(): - if 'md5' in value: - del cache_data[key]['md5'] - if 'sha1' in value: - del cache_data[key]['sha1'] - if 'url_etag' in value: - del cache_data[key]['url_etag'] - if 'url_crc' in value: - del cache_data[key]['url_crc'] - return cache_data def _get_purchase_keys(self):