humblebundle-downloader/humblebundle_downloader/download_library.py

import os
import json
import parsel
import logging
import requests

logger = logging.getLogger(__name__)


def _clean_name(dirty_str):
    allowed_chars = (' ', '_', '.', '-', '[', ']')
    clean = []
    for c in dirty_str.replace('+', '_').replace(':', ' -'):
        if c.isalpha() or c.isdigit() or c in allowed_chars:
            clean.append(c)

    return "".join(clean).strip()


def download_library(cookie_path, library_path, progress_bar=False,
                     ext_include=None, ext_exclude=None, platform_include=None,
                     purchase_keys=None):
    if ext_include is None:
        ext_include = []
    ext_include = list(map(str.lower, ext_include))

    if ext_exclude is None:
        ext_exclude = []
    ext_exclude = list(map(str.lower, ext_exclude))

    if platform_include is None or 'all' in platform_include:
        # if all then we do not even need to use this feature
        platform_include = []
    platform_include = list(map(str.lower, platform_include))

    # Load cookies
    with open(cookie_path, 'r') as f:
        account_cookies = f.read()

    cache_file = os.path.join(library_path, '.cache.json')

    try:
        with open(cache_file, 'r') as f:
            cache_data = json.load(f)
    except FileNotFoundError:
        cache_data = {}

    if not purchase_keys:
        library_r = requests.get('https://www.humblebundle.com/home/library',
                                 headers={'cookie': account_cookies})
        logger.debug("Library request: " + str(library_r))
        library_page = parsel.Selector(text=library_r.text)
        orders_json = json.loads(library_page.css('#user-home-json-data')
                                             .xpath('string()').extract_first())
        purchase_keys = orders_json['gamekeys']

    for order_id in purchase_keys:
        order_url = 'https://www.humblebundle.com/api/v1/order/{order_id}?all_tpkds=true'.format(order_id=order_id)  # noqa: E501
        order_r = requests.get(order_url,
                               headers={'cookie': account_cookies})
        logger.debug("Order request: " + str(order_r))
        order = order_r.json()
        bundle_title = _clean_name(order['product']['human_name'])
        logger.info("Checking bundle: " + str(bundle_title))
        for item in order['subproducts']:
            item_title = _clean_name(item['human_name'])
            # Get all types of download for a product
            for download_type in item['downloads']:
                # Type of product, ebook, videos, etc...

                platform = download_type['platform'].lower()
                if (platform_include and platform not in platform_include):
                    logger.info("Do not want " + platform
                                + ", Skipping " + item_title)
                    continue

                item_folder = os.path.join(
                    library_path, bundle_title, item_title
                )

                # Create directory to save the files to
                try: os.makedirs(item_folder)  # noqa: E701
                except OSError: pass  # noqa: E701

                # Download each file type of a product
                for file_type in download_type['download_struct']:
                    try:
                        url = file_type['url']['web']
                    except KeyError:
                        logger.info("No url found for " + bundle_title
                                    + "/" + item_title)
                        continue

                    url_filename = url.split('?')[0].split('/')[-1]
                    cache_file_key = order_id + ':' + url_filename
                    ext = url_filename.split('.')[-1]
                    # Only get the file types we care about
                    if ((ext_include and ext.lower() not in ext_include)
                            or (ext_exclude and ext.lower() in ext_exclude)):
                        logger.info("Skipping the file " + url_filename)
                        continue

                    filename = os.path.join(item_folder, url_filename)
                    item_r = requests.get(url, stream=True)
                    logger.debug("Item request: {item_r}, Url: {url}"
                                 .format(item_r=item_r, url=url))
                    # Not sure which value will be best to use, so use them all
                    file_info = {
                        'md5': file_type.get('md5'),
                        'sha1': file_type.get('sha1'),
                        'url_last_modified': item_r.headers['Last-Modified'],
                        'url_etag': item_r.headers['ETag'][1:-1],
                        'url_crc': item_r.headers['X-HW-Cache-CRC'],
                    }
                    if file_info != cache_data.get(cache_file_key, {}):
                        if not progress_bar:
                            logger.info("Downloading: {item_title}/{url_filename}"  # noqa: E501
                                        .format(item_title=item_title,
                                                url_filename=url_filename))

                        with open(filename, 'wb') as outfile:
                            total_length = item_r.headers.get('content-length')
                            if total_length is None:  # no content length header
                                outfile.write(item_r.content)
                            else:
                                dl = 0
                                total_length = int(total_length)
                                for data in item_r.iter_content(chunk_size=4096):  # noqa E501
                                    dl += len(data)
                                    outfile.write(data)
                                    pb_width = 50
                                    done = int(pb_width * dl / total_length)
                                    if progress_bar:
                                        print("Downloading: {item_title}/{url_filename}: {percent}% [{filler}{space}]"  # noqa E501
                                              .format(item_title=item_title,
                                                      url_filename=url_filename,
                                                      percent=int(done * (100 / pb_width)),  # noqa E501
                                                      filler='=' * done,
                                                       space=' ' * (pb_width - done),  # noqa E501
                                                      ), end='\r')

                        if progress_bar:
                            # print new line so next progress bar
                            # is on its own line
                            print()

                        cache_data[cache_file_key] = file_info
                        # Update cache file with newest data so if the script
                        # quits it can keep track of the progress
                        with open(cache_file, 'w') as outfile:
                            json.dump(
                                cache_data, outfile,
                                sort_keys=True, indent=4,
                            )
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`import os`
			`import json`
			`import parsel`
			`import logging`
			`import requests`

			`logger = logging.getLogger(__name__)`


			`def _clean_name(dirty_str):`
Fixed a windows filename bug. Fixed bug where item does not have a web url 2020-01-20 02:31:00 +01:00			`allowed_chars = (' ', '_', '.', '-', '[', ']')`
flake8 clean up 2020-01-19 15:40:41 +01:00			`clean = []`
Fixed a windows filename bug. Fixed bug where item does not have a web url 2020-01-20 02:31:00 +01:00			`for c in dirty_str.replace('+', '_').replace(':', ' -'):`
flake8 clean up 2020-01-19 15:40:41 +01:00			`if c.isalpha() or c.isdigit() or c in allowed_chars:`
			`clean.append(c)`

Typos and syntax issues 2020-01-20 00:43:35 +01:00			`return "".join(clean).strip()`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00

Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00			`def download_library(cookie_path, library_path, progress_bar=False,`
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`ext_include=None, ext_exclude=None, platform_include=None,`
			`purchase_keys=None):`
Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00			`if ext_include is None:`
			`ext_include = []`
Fixed #6. Support filtering by platform 2020-01-20 05:11:35 +01:00			`ext_include = list(map(str.lower, ext_include))`

Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00			`if ext_exclude is None:`
			`ext_exclude = []`
Fixed #6. Support filtering by platform 2020-01-20 05:11:35 +01:00			`ext_exclude = list(map(str.lower, ext_exclude))`

			`if platform_include is None or 'all' in platform_include:`
			`# if all then we do not even need to use this feature`
			`platform_include = []`
			`platform_include = list(map(str.lower, platform_include))`
Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`# Load cookies`
			`with open(cookie_path, 'r') as f:`
			`account_cookies = f.read()`

			`cache_file = os.path.join(library_path, '.cache.json')`

			`try:`
			`with open(cache_file, 'r') as f:`
			`cache_data = json.load(f)`
			`except FileNotFoundError:`
			`cache_data = {}`

Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`if not purchase_keys:`
			`library_r = requests.get('https://www.humblebundle.com/home/library',`
			`headers={'cookie': account_cookies})`
			`logger.debug("Library request: " + str(library_r))`
			`library_page = parsel.Selector(text=library_r.text)`
			`orders_json = json.loads(library_page.css('#user-home-json-data')`
			`.xpath('string()').extract_first())`
			`purchase_keys = orders_json['gamekeys']`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`for order_id in purchase_keys:`
0.0.6 2020-01-20 00:39:28 +01:00			`order_url = 'https://www.humblebundle.com/api/v1/order/{order_id}?all_tpkds=true'.format(order_id=order_id) # noqa: E501`
flake8 clean up 2020-01-19 15:40:41 +01:00			`order_r = requests.get(order_url,`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`headers={'cookie': account_cookies})`
0.0.6 2020-01-20 00:39:28 +01:00			`logger.debug("Order request: " + str(order_r))`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`order = order_r.json()`
			`bundle_title = _clean_name(order['product']['human_name'])`
0.0.6 2020-01-20 00:39:28 +01:00			`logger.info("Checking bundle: " + str(bundle_title))`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`for item in order['subproducts']:`
			`item_title = _clean_name(item['human_name'])`
			`# Get all types of download for a product`
			`for download_type in item['downloads']:`
flake8 clean up 2020-01-19 15:40:41 +01:00			`# Type of product, ebook, videos, etc...`
Fixed #6. Support filtering by platform 2020-01-20 05:11:35 +01:00
			`platform = download_type['platform'].lower()`
			`if (platform_include and platform not in platform_include):`
			`logger.info("Do not want " + platform`
			`+ ", Skipping " + item_title)`
			`continue`

flake8 clean up 2020-01-19 15:40:41 +01:00			`item_folder = os.path.join(`
			`library_path, bundle_title, item_title`
			`)`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00
			`# Create directory to save the files to`
			`try: os.makedirs(item_folder) # noqa: E701`
			`except OSError: pass # noqa: E701`

			`# Download each file type of a product`
			`for file_type in download_type['download_struct']:`
Fixed a windows filename bug. Fixed bug where item does not have a web url 2020-01-20 02:31:00 +01:00			`try:`
			`url = file_type['url']['web']`
			`except KeyError:`
Fixed #6. Support filtering by platform 2020-01-20 05:11:35 +01:00			`logger.info("No url found for " + bundle_title`
			`+ "/" + item_title)`
Fixed a windows filename bug. Fixed bug where item does not have a web url 2020-01-20 02:31:00 +01:00			`continue`
Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`url_filename = url.split('?')[0].split('/')[-1]`
			`cache_file_key = order_id + ':' + url_filename`
			`ext = url_filename.split('.')[-1]`
Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00			`# Only get the file types we care about`
Fixed #6. Support filtering by platform 2020-01-20 05:11:35 +01:00			`if ((ext_include and ext.lower() not in ext_include)`
			`or (ext_exclude and ext.lower() in ext_exclude)):`
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`logger.info("Skipping the file " + url_filename)`
Fixed sso & 2fa cookie gen bug Fixed #3 & #8. Added file type filtering Fixed #2 2020-01-20 04:31:43 +01:00			`continue`

Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`filename = os.path.join(item_folder, url_filename)`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`item_r = requests.get(url, stream=True)`
0.0.6 2020-01-20 00:39:28 +01:00			`logger.debug("Item request: {item_r}, Url: {url}"`
			`.format(item_r=item_r, url=url))`
flake8 clean up 2020-01-19 15:40:41 +01:00			`# Not sure which value will be best to use, so use them all`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`file_info = {`
Not everything has the same hash options. Fixes #4 2020-01-19 19:37:22 +01:00			`'md5': file_type.get('md5'),`
			`'sha1': file_type.get('sha1'),`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`'url_last_modified': item_r.headers['Last-Modified'],`
			`'url_etag': item_r.headers['ETag'][1:-1],`
			`'url_crc': item_r.headers['X-HW-Cache-CRC'],`
			`}`
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`if file_info != cache_data.get(cache_file_key, {}):`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`if not progress_bar:`
flake8 fixs 2020-01-20 16:28:54 +01:00			`logger.info("Downloading: {item_title}/{url_filename}" # noqa: E501`
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`.format(item_title=item_title,`
			`url_filename=url_filename))`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00
			`with open(filename, 'wb') as outfile:`
			`total_length = item_r.headers.get('content-length')`
			`if total_length is None: # no content length header`
			`outfile.write(item_r.content)`
			`else:`
			`dl = 0`
			`total_length = int(total_length)`
flake8 clean up 2020-01-19 15:40:41 +01:00			`for data in item_r.iter_content(chunk_size=4096): # noqa E501`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`dl += len(data)`
			`outfile.write(data)`
			`pb_width = 50`
			`done = int(pb_width * dl / total_length)`
flake8 clean up 2020-01-19 15:40:41 +01:00			`if progress_bar:`
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`print("Downloading: {item_title}/{url_filename}: {percent}% [{filler}{space}]" # noqa E501`
			`.format(item_title=item_title,`
			`url_filename=url_filename,`
0.0.6 2020-01-20 00:39:28 +01:00			`percent=int(done * (100 / pb_width)), # noqa E501`
			`filler='=' * done,`
			`space=' ' * (pb_width - done), # noqa E501`
			`), end='\r')`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00
			`if progress_bar:`
flake8 clean up 2020-01-19 15:40:41 +01:00			`# print new line so next progress bar`
			`# is on its own line`
			`print()`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00
Fixed #5 & Fixed #9. Added --keys support for downloading a single bundle 2020-01-20 16:26:48 +01:00			`cache_data[cache_file_key] = file_info`
flake8 clean up 2020-01-19 15:40:41 +01:00			`# Update cache file with newest data so if the script`
			`# quits it can keep track of the progress`
Added ci for releasing new versions to pypi 2020-01-19 15:25:00 +01:00			`with open(cache_file, 'w') as outfile:`
flake8 clean up 2020-01-19 15:40:41 +01:00			`json.dump(`
			`cache_data, outfile,`
			`sort_keys=True, indent=4,`
			`)`