itch-dl/downloader.py

311 lines
9.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# Python 3.8+ and dependencies listed below required.
import os
import sys
import json
import time
import shutil
import hashlib
import argparse
import traceback
import subprocess
from enum import Enum
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from slugify import slugify
WGET_PATH = shutil.which("wget")
if WGET_PATH is None:
print(f"Warning: wget not available, site mirroring will not work!")
# Try to download all site assets, images etc included.
# You probably don't want this, but here you go!
PEDANTIC_MIRRORING = False
ITCH_API = "https://api.itch.io"
class ItchDownloadResult(Enum):
SUCCESS = 0
FAILURE = 1
MISSING_DOWNLOAD = 2
DOWNLOAD_TIMEOUT = 3
class ItchDownloadError(Exception):
pass
class ItchApiClient():
def __init__(self, base_url: str, api_key: str):
self.base_url = base_url
self.api_key = api_key
self.requests = requests.Session()
retry_strategy = Retry(
total=5,
backoff_factor=10,
allowed_methods=["HEAD", "GET"],
status_forcelist=[429, 500, 502, 503, 504]
)
# No timeouts - set them explicitly on API calls below!
adapter = HTTPAdapter(max_retries=retry_strategy)
self.requests.mount("https://", adapter)
self.requests.mount("http://", adapter)
def add_api_key(self, kwargs):
# Adds the API key to request params, if one was not
# already provided outside of the client.
if 'data' in kwargs:
params = kwargs['data']
else:
params = {}
kwargs['data'] = params
if 'api_key' not in params:
params['api_key'] = self.api_key
def get(self, endpoint: str, *args, **kwargs):
self.add_api_key(kwargs)
return self.requests.get(self.base_url + endpoint, *args, **kwargs)
def download_file(client: ItchApiClient, upload_id: int, download_path: str, print_url: bool=False):
# No timeouts, chunked uploads, default retry strategy, should be all good?
try:
with client.get(f"/uploads/{upload_id}/download", stream=True) as r:
r.raise_for_status()
if print_url:
print(f"Download URL: {r.url}")
with open(download_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1048576): # 1MB chunks
f.write(chunk)
except requests.exceptions.HTTPError as e:
raise ItchDownloadError(f"Unrecoverable download error: {e}")
def get_download_keys(client: ItchApiClient):
print("Fetching all download keys...")
download_keys = {}
page = 1
while True:
print(f"Downloading page {page}...")
try:
r = client.get("/profile/owned-keys", data={"page": page}, timeout=15)
r.raise_for_status()
except Exception as e:
print(f"Got error while fetching download keys: {e}")
print(f"Let's just pretend this is enough and move on...")
break
data = r.json()
if 'owned_keys' not in data:
break # Assuming we're out of keys already...
for key in data['owned_keys']:
download_keys[key['game_id']] = key['id']
if len(data['owned_keys']) == data['per_page']:
page += 1
else:
break
print(f"Fetched {len(download_keys)} download keys.")
return download_keys
def parse_jobs(jam_json: dict) -> list[tuple[int, str, str]]:
if 'jam_games' not in jam_json:
raise Exception("Provided JSON is not a valid itch.io jam JSON.")
# Extract (id, url) pairs from all the entries.
return [(int(e['game']['id']), e['game']['title'], e['game']['url']) for e in jam_json['jam_games']]
def download_jam(path_to_json: str, download_to: str, api_key: str, continue_from: str=None):
try:
with open(path_to_json) as f:
jam_json = json.load(f)
except FileNotFoundError:
print(f"File {path_to_json} not found.")
except json.decoder.JSONDecodeError:
print(F"Provided entries file is not a valid JSON file.")
client = ItchApiClient(ITCH_API, api_key)
# Check API key validity:
profile_req = client.get("/profile")
if not profile_req.ok:
print(f"Provided API key appears to be invalid: {profile_req.text}")
exit(1)
jobs = parse_jobs(jam_json)
jobs_successful = []
jobs_failed = []
download_keys = get_download_keys(client)
game_id_to_meta = {} # dict[game_id: int, (title: str, url: str)]
for game_id, title, url in jobs:
game_id_to_meta[game_id] = (title, url)
failed_game_ids = set()
# No "continue from"? Yep, start right away.
should_process_jobs = continue_from is None
for game_id, title, url in jobs:
label = f"{title} ({game_id})"
if not should_process_jobs:
if game_id == continue_from:
should_process_jobs = True
else:
continue
try:
download_path = os.path.join(download_to, slugify(title))
if PEDANTIC_MIRRORING:
site_mirror_path = os.path.join(download_to, "_sites")
else:
site_mirror_path = os.path.join(download_path, "site")
os.makedirs(download_path, exist_ok=True)
os.makedirs(site_mirror_path, exist_ok=True)
except:
raise ItchDownloadError(f"Could not create download directory: {download_path}")
print(f"Trying to download {label} to {download_path}")
if WGET_PATH is not None:
print("Downloading site...")
if PEDANTIC_MIRRORING:
extra_wget_args = [
"--timestamping",
"--span-hosts",
"--convert-links",
"--adjust-extension",
"--page-requisites",
]
else:
extra_wget_args = []
wget = subprocess.run([
WGET_PATH,
*extra_wget_args,
"--quiet",
url
], cwd=site_mirror_path)
if wget.returncode != 0:
print(f"Warning: Site mirroring failed/incomplete.")
creds = {}
if game_id in download_keys:
creds['download_key_id'] = download_keys[game_id]
print("Using {creds} for private uploads")
game_uploads_req = client.get(f"/games/{game_id}/uploads", data=creds, timeout=15)
if not game_uploads_req.ok:
raise ItchDownloadError(f"Could not fetch game uploads for {label}: {game_uploads_req.text}")
game_uploads = game_uploads_req.json()['uploads']
print(f"Found {len(game_uploads)} upload(s)")
try:
for upload in game_uploads:
upload_id = upload['id']
file_name = upload['filename']
file_size = upload['size']
upload_is_external = upload['storage'] == 'external'
print(f"Downloading '{file_name}' ({upload_id}), {file_size} bytes...")
if upload_is_external:
print("***********************************************************")
print("* *")
print("* WARNING: External storage - downloads will likely fail. *")
print("* Check the URL displayed below manually! *")
print("* *")
print("***********************************************************")
target_path = os.path.join(download_path, file_name)
try:
download_file(client, upload_id, target_path, print_url=upload_is_external)
except ItchDownloadError as e:
jobs_failed.append((game_id, file_name, str(e)))
print(f"Download failed for {file_name}: {e}")
continue
try:
actual_file_size = os.stat(target_path).st_size
if actual_file_size == file_size:
jobs_successful.append((game_id, file_name))
else:
jobs_failed.append((game_id, file_name, f"File size is {actual_file_size}, expected {file_size}"))
except FileNotFoundError:
jobs_failed.append((game_id, file_name, "Could not download file"))
print(f"Done downloading {label}")
except ItchDownloadError as e:
failed_game_ids.append((game_id, str(e)))
print(message)
continue
except Exception as e:
print(f"Critical error while downloading {label}: {e}")
failed_game_ids.append((game_id, str(e)))
traceback.print_exc()
print(message)
continue
successful_titles = {}
for game_id, file_name in jobs_successful:
if game_id not in successful_titles:
successful_titles[game_id] = [file_name]
if any(successful_titles):
print(f"\nAll done, downloaded files for {len(successful_titles)} title(s):")
for game_id, files in successful_titles.items():
print(f"{game_id_to_meta[game_id][0]}, {len(files)} file(s)")
if any(jobs_failed):
print(f"\nDownloads failed for {len(jobs_failed)} file(s):")
for game_id, file_name, message in jobs_failed:
title, url = game_id_to_meta[game_id]
print(f"{title} - {file_name} - {message}")
print(f"Title URL: {url}")
if any(failed_game_ids):
print(f"\nCompletely failed downloads for {len(failed_game_ids)} titles:")
for game_id, message in failed_game_ids:
title, url = game_id_to_meta[game_id]
print(f"{title} ({game_id}) - {url} - {message}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Downloads games from public Itch.io game jams.")
parser.add_argument("entries", help="path to the game jam entries.json file")
parser.add_argument("--api-key", metavar="key", required=True, help="itch.io API key from https://itch.io/user/settings/api-keys")
parser.add_argument("--download-to", metavar="path", help="directory to save results into (default: current dir)")
parser.add_argument("--continue-from", metavar="id", help="skip all entries until the provided entry ID is found")
args = parser.parse_args()
continue_id = args.continue_from
if continue_id is not None:
try:
continue_id = int(continue_id)
except:
print("ID to continue from must be an integer.")
exit(1)
download_to = os.getcwd()
if args.download_to is not None:
download_to = os.path.normpath(args.download_to)
os.makedirs(download_to)
download_jam(args.entries, download_to, args.api_key, continue_from=continue_id)