mirror of
https://github.com/DragoonAethis/itch-dl.git
synced 2024-12-21 02:21:52 +01:00
4627a863d8
Script now accepts either a JSON file path or a jam URL, in which case it downloads the page, extracts the jam ID, downloads the entries JSON and grabs all titles accordingly.
350 lines
11 KiB
Python
Executable File
350 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Python 3.8+ and dependencies listed below required.
|
|
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
import time
|
|
import shutil
|
|
import hashlib
|
|
import argparse
|
|
import traceback
|
|
import subprocess
|
|
from enum import Enum
|
|
|
|
import requests
|
|
from requests.adapters import HTTPAdapter
|
|
from requests.packages.urllib3.util.retry import Retry
|
|
|
|
from slugify import slugify
|
|
|
|
WGET_PATH = shutil.which("wget")
|
|
if WGET_PATH is None:
|
|
print(f"Warning: wget not available, site mirroring will not work!")
|
|
|
|
# Try to download all site assets, images etc included.
|
|
# You probably don't want this, but here you go!
|
|
PEDANTIC_MIRRORING = False
|
|
|
|
ITCH_API = "https://api.itch.io"
|
|
|
|
|
|
class ItchDownloadResult(Enum):
|
|
SUCCESS = 0
|
|
FAILURE = 1
|
|
MISSING_DOWNLOAD = 2
|
|
DOWNLOAD_TIMEOUT = 3
|
|
|
|
|
|
class ItchDownloadError(Exception):
|
|
pass
|
|
|
|
|
|
class ItchApiClient():
|
|
def __init__(self, base_url: str, api_key: str):
|
|
self.base_url = base_url
|
|
self.api_key = api_key
|
|
|
|
self.requests = requests.Session()
|
|
|
|
retry_strategy = Retry(
|
|
total=5,
|
|
backoff_factor=10,
|
|
allowed_methods=["HEAD", "GET"],
|
|
status_forcelist=[429, 500, 502, 503, 504]
|
|
)
|
|
|
|
# No timeouts - set them explicitly on API calls below!
|
|
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
self.requests.mount("https://", adapter)
|
|
self.requests.mount("http://", adapter)
|
|
|
|
def add_api_key(self, kwargs):
|
|
# Adds the API key to request params, if one was not
|
|
# already provided outside of the client.
|
|
if 'data' in kwargs:
|
|
params = kwargs['data']
|
|
else:
|
|
params = {}
|
|
kwargs['data'] = params
|
|
|
|
if 'api_key' not in params:
|
|
params['api_key'] = self.api_key
|
|
|
|
def get(self, endpoint: str, *args, **kwargs):
|
|
self.add_api_key(kwargs)
|
|
return self.requests.get(self.base_url + endpoint, *args, **kwargs)
|
|
|
|
|
|
def download_file(client: ItchApiClient, upload_id: int, download_path: str, creds: dict, print_url: bool=False):
|
|
# No timeouts, chunked uploads, default retry strategy, should be all good?
|
|
try:
|
|
with client.get(f"/uploads/{upload_id}/download", data=creds, stream=True) as r:
|
|
r.raise_for_status()
|
|
if print_url:
|
|
print(f"Download URL: {r.url}")
|
|
|
|
with open(download_path, 'wb') as f:
|
|
for chunk in r.iter_content(chunk_size=1048576): # 1MB chunks
|
|
f.write(chunk)
|
|
except requests.exceptions.HTTPError as e:
|
|
raise ItchDownloadError(f"Unrecoverable download error: {e}")
|
|
|
|
|
|
def get_download_keys(client: ItchApiClient):
|
|
print("Fetching all download keys...")
|
|
download_keys = {}
|
|
page = 1
|
|
|
|
while True:
|
|
print(f"Downloading page {page}...")
|
|
try:
|
|
r = client.get("/profile/owned-keys", data={"page": page}, timeout=15)
|
|
r.raise_for_status()
|
|
except Exception as e:
|
|
print(f"Got error while fetching download keys: {e}")
|
|
print(f"Let's just pretend this is enough and move on...")
|
|
break
|
|
|
|
data = r.json()
|
|
if 'owned_keys' not in data:
|
|
break # Assuming we're out of keys already...
|
|
|
|
for key in data['owned_keys']:
|
|
download_keys[key['game_id']] = key['id']
|
|
|
|
if len(data['owned_keys']) == data['per_page']:
|
|
page += 1
|
|
else:
|
|
break
|
|
|
|
print(f"Fetched {len(download_keys)} download keys.")
|
|
return download_keys
|
|
|
|
|
|
def parse_jobs(jam_json: dict) -> list[tuple[int, str, str]]:
|
|
if 'jam_games' not in jam_json:
|
|
raise Exception("Provided JSON is not a valid itch.io jam JSON.")
|
|
|
|
# Extract (id, url) pairs from all the entries.
|
|
return [(int(e['game']['id']), e['game']['title'], e['game']['url']) for e in jam_json['jam_games']]
|
|
|
|
|
|
def get_game_jam_json(jam_path: str) -> dict:
|
|
# Do we have an URL?
|
|
jam_path = jam_path.strip()
|
|
if jam_path.startswith("https://") or jam_path.startswith("http://"):
|
|
r = requests.get(jam_path)
|
|
if not r.ok:
|
|
raise Exception(f"Could not download game jam site from {jam_path} (code {r.status_code}): {r.reason}")
|
|
|
|
jam_id_line = None
|
|
for line in r.text.splitlines():
|
|
if "ViewJam" in line:
|
|
jam_id_line = line
|
|
|
|
if jam_id_line is None:
|
|
raise Exception(f"Jam site did not contain the ID line - please provide the path to the game jam entries JSON file instead.")
|
|
|
|
found_ids = re.findall(r'\"id\":([0-9]+)', jam_id_line)
|
|
if len(found_ids) == 0:
|
|
raise Exception(f"Could not extract the jam ID from the provided site.")
|
|
|
|
jam_id = int(found_ids[0]) # Always grab the first one for now...
|
|
print(f"Extracted jam ID: {jam_id}")
|
|
|
|
r = requests.get(f"https://itch.io/jam/{jam_id}/entries.json")
|
|
if not r.ok:
|
|
raise Exception(f"Could not download the game jam entries list.")
|
|
|
|
content = r.text
|
|
elif os.path.isfile(jam_path):
|
|
try:
|
|
with open(jam_path) as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
raise Exception(f"Could not open/read the game jam entries file: {e}")
|
|
else:
|
|
raise Exception(f"Provided game jam path is invalid (not a link/existing file).")
|
|
|
|
try:
|
|
jam_json = json.loads(content)
|
|
except json.decoder.JSONDecodeError:
|
|
print(f"Provided game jam entries file is not a valid JSON file.")
|
|
|
|
return jam_json
|
|
|
|
|
|
def download_jam(jam_path: str, download_to: str, api_key: str, continue_from: str=None):
|
|
client = ItchApiClient(ITCH_API, api_key)
|
|
jam_json = get_game_jam_json(jam_path)
|
|
|
|
# Check API key validity:
|
|
profile_req = client.get("/profile")
|
|
if not profile_req.ok:
|
|
print(f"Provided API key appears to be invalid: {profile_req.text}")
|
|
exit(1)
|
|
|
|
jobs = parse_jobs(jam_json)
|
|
jobs_successful = []
|
|
jobs_failed = []
|
|
|
|
download_keys = get_download_keys(client)
|
|
game_id_to_meta = {} # dict[game_id: int, (title: str, url: str)]
|
|
|
|
for game_id, title, url in jobs:
|
|
game_id_to_meta[game_id] = (title, url)
|
|
|
|
failed_game_ids = set()
|
|
|
|
# No "continue from"? Yep, start right away.
|
|
should_process_jobs = continue_from is None
|
|
|
|
for game_id, title, url in jobs:
|
|
label = f"{title} ({game_id})"
|
|
if not should_process_jobs:
|
|
if game_id == continue_from:
|
|
should_process_jobs = True
|
|
else:
|
|
continue
|
|
|
|
try:
|
|
download_path = os.path.join(download_to, slugify(title))
|
|
if PEDANTIC_MIRRORING:
|
|
site_mirror_path = os.path.join(download_to, "_sites")
|
|
else:
|
|
site_mirror_path = os.path.join(download_path, "site")
|
|
os.makedirs(download_path, exist_ok=True)
|
|
os.makedirs(site_mirror_path, exist_ok=True)
|
|
except:
|
|
raise ItchDownloadError(f"Could not create download directory: {download_path}")
|
|
|
|
print(f"Trying to download {label} to {download_path}")
|
|
|
|
if WGET_PATH is not None:
|
|
print("Downloading site...")
|
|
if PEDANTIC_MIRRORING:
|
|
extra_wget_args = [
|
|
"--timestamping",
|
|
"--span-hosts",
|
|
"--convert-links",
|
|
"--adjust-extension",
|
|
"--page-requisites",
|
|
]
|
|
else:
|
|
extra_wget_args = []
|
|
|
|
wget = subprocess.run([
|
|
WGET_PATH,
|
|
*extra_wget_args,
|
|
"--quiet",
|
|
url
|
|
], cwd=site_mirror_path)
|
|
|
|
if wget.returncode != 0:
|
|
print(f"Warning: Site mirroring failed/incomplete.")
|
|
|
|
creds = {}
|
|
if game_id in download_keys:
|
|
creds['download_key_id'] = download_keys[game_id]
|
|
print("Using {creds} for private uploads")
|
|
|
|
game_uploads_req = client.get(f"/games/{game_id}/uploads", data=creds, timeout=15)
|
|
if not game_uploads_req.ok:
|
|
raise ItchDownloadError(f"Could not fetch game uploads for {label}: {game_uploads_req.text}")
|
|
|
|
game_uploads = game_uploads_req.json()['uploads']
|
|
print(f"Found {len(game_uploads)} upload(s)")
|
|
|
|
try:
|
|
for upload in game_uploads:
|
|
upload_id = upload['id']
|
|
file_name = upload['filename']
|
|
file_size = upload['size']
|
|
upload_is_external = upload['storage'] == 'external'
|
|
|
|
print(f"Downloading '{file_name}' ({upload_id}), {file_size} bytes...")
|
|
if upload_is_external:
|
|
print("***********************************************************")
|
|
print("* *")
|
|
print("* WARNING: External storage - downloads will likely fail. *")
|
|
print("* Check the URL displayed below manually! *")
|
|
print("* *")
|
|
print("***********************************************************")
|
|
|
|
target_path = os.path.join(download_path, file_name)
|
|
try:
|
|
download_file(client, upload_id, target_path, creds, print_url=upload_is_external)
|
|
except ItchDownloadError as e:
|
|
jobs_failed.append((game_id, file_name, str(e)))
|
|
print(f"Download failed for {file_name}: {e}")
|
|
continue
|
|
|
|
try:
|
|
actual_file_size = os.stat(target_path).st_size
|
|
if actual_file_size == file_size:
|
|
jobs_successful.append((game_id, file_name))
|
|
else:
|
|
jobs_failed.append((game_id, file_name, f"File size is {actual_file_size}, expected {file_size}"))
|
|
except FileNotFoundError:
|
|
jobs_failed.append((game_id, file_name, "Could not download file"))
|
|
|
|
print(f"Done downloading {label}")
|
|
except ItchDownloadError as e:
|
|
failed_game_ids.append((game_id, str(e)))
|
|
print(message)
|
|
continue
|
|
except Exception as e:
|
|
print(f"Critical error while downloading {label}: {e}")
|
|
failed_game_ids.append((game_id, str(e)))
|
|
traceback.print_exc()
|
|
print(message)
|
|
continue
|
|
|
|
successful_titles = {}
|
|
for game_id, file_name in jobs_successful:
|
|
if game_id not in successful_titles:
|
|
successful_titles[game_id] = [file_name]
|
|
|
|
if any(successful_titles):
|
|
print(f"\nAll done, downloaded files for {len(successful_titles)} title(s):")
|
|
for game_id, files in successful_titles.items():
|
|
print(f"{game_id_to_meta[game_id][0]}, {len(files)} file(s)")
|
|
|
|
if any(jobs_failed):
|
|
print(f"\nDownloads failed for {len(jobs_failed)} file(s):")
|
|
for game_id, file_name, message in jobs_failed:
|
|
title, url = game_id_to_meta[game_id]
|
|
print(f"{title} - {file_name} - {message}")
|
|
print(f"Title URL: {url}")
|
|
|
|
if any(failed_game_ids):
|
|
print(f"\nCompletely failed downloads for {len(failed_game_ids)} titles:")
|
|
for game_id, message in failed_game_ids:
|
|
title, url = game_id_to_meta[game_id]
|
|
print(f"{title} ({game_id}) - {url} - {message}")
|
|
|
|
|
|
def get_parser():
|
|
parser = argparse.ArgumentParser(description="Downloads games from public Itch.io game jams.")
|
|
parser.add_argument("entries", help="path to the game jam entries.json file")
|
|
parser.add_argument("--api-key", metavar="key", required=True, help="itch.io API key from https://itch.io/user/settings/api-keys")
|
|
parser.add_argument("--download-to", metavar="path", help="directory to save results into (default: current dir)")
|
|
parser.add_argument("--continue-from", metavar="id", type=int, help="skip all entries until the provided entry ID is found")
|
|
return parser
|
|
|
|
|
|
def get_download_dir(args: argparse.Namespace) -> str:
|
|
download_to = os.getcwd()
|
|
if args.download_to is not None:
|
|
download_to = os.path.normpath(args.download_to)
|
|
os.makedirs(download_to)
|
|
|
|
return download_to
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = get_parser().parse_args()
|
|
download_to = get_download_dir(args)
|
|
download_jam(args.entries, download_to, args.api_key, continue_from=args.continue_from)
|