mirror of
https://github.com/DragoonAethis/itch-dl.git
synced 2024-12-21 02:21:52 +01:00
Implement screenshot/cover art downloads, initial infobox parsing
This commit is contained in:
parent
8a6bed69f4
commit
f5c0f4658d
@ -65,4 +65,4 @@ def run() -> int:
|
||||
# Grab all the download keys (there's no way to fetch them per title...):
|
||||
keys = get_download_keys(client)
|
||||
|
||||
return drive_downloads(jobs, download_to, args.api_key, keys, parallel=args.parallel)
|
||||
return drive_downloads(jobs, download_to, args.mirror_web, args.api_key, keys, parallel=args.parallel)
|
||||
|
@ -13,10 +13,11 @@ from tqdm.contrib.concurrent import thread_map
|
||||
from .api import ItchApiClient
|
||||
from .utils import ItchDownloadError, get_int_after_marker_in_json
|
||||
from .consts import ITCH_GAME_URL_REGEX
|
||||
|
||||
from .infobox import parse_infobox
|
||||
|
||||
TARGET_PATHS = {
|
||||
'site': 'site.html',
|
||||
'cover': 'cover',
|
||||
'metadata': 'metadata.json',
|
||||
'files': 'files',
|
||||
'screenshots': 'screenshots'
|
||||
@ -42,18 +43,21 @@ class GameMetadata(TypedDict, total=False):
|
||||
author: str
|
||||
author_url: str
|
||||
|
||||
description: str
|
||||
cover_url: str
|
||||
screenshots: List[str]
|
||||
description: str
|
||||
|
||||
created_at: str
|
||||
released_at: str
|
||||
published_at: str
|
||||
|
||||
|
||||
class GameDownloader:
|
||||
def __init__(self, download_to: str, api_key: str, keys: Dict[int, str]):
|
||||
def __init__(self, download_to: str, mirror_web: bool, api_key: str, keys: Dict[int, str]):
|
||||
self.download_to = download_to
|
||||
self.download_keys = keys
|
||||
self.mirror_web = mirror_web
|
||||
|
||||
self.download_keys = keys
|
||||
self.client = ItchApiClient(api_key)
|
||||
|
||||
def get_rating_json(self, site) -> Optional[dict]:
|
||||
@ -112,20 +116,32 @@ class GameDownloader:
|
||||
return game_id
|
||||
|
||||
def extract_metadata(self, game_id: int, url: str, site: BeautifulSoup) -> GameMetadata:
|
||||
rating_json: Optional[dict] = self.get_rating_json(site)
|
||||
title = rating_json.get("name")
|
||||
|
||||
description: Optional[str] = self.get_meta(site, property="og:description")
|
||||
if not description:
|
||||
description = self.get_meta(site, name="description")
|
||||
|
||||
screenshot_urls: List[str] = []
|
||||
screenshots_node = site.find("div", class_="screenshot_list")
|
||||
if screenshots_node:
|
||||
screenshot_urls = [a['href'] for a in screenshots_node.find_all('a')]
|
||||
|
||||
metadata = GameMetadata(
|
||||
game_id=game_id,
|
||||
title=site.find("h1", class_="game_title").text.strip(),
|
||||
title=title or site.find("h1", class_="game_title").text.strip(),
|
||||
url=url,
|
||||
cover_url=self.get_meta(site, property="og:image"),
|
||||
description=description
|
||||
screenshots=screenshot_urls,
|
||||
description=description,
|
||||
)
|
||||
|
||||
TODO_KEYS = ['author', 'author_url', 'created_at', 'published_at']
|
||||
TODO_rating_json: Optional[dict] = self.get_rating_json(site)
|
||||
infobox_div = site.find("div", class_="game_info_panel_widget")
|
||||
if infobox_div:
|
||||
infobox = parse_infobox(infobox_div)
|
||||
|
||||
TODO_KEYS = ['author', 'author_url', 'created_at', 'released_at', 'published_at']
|
||||
|
||||
return metadata
|
||||
|
||||
@ -137,17 +153,17 @@ class GameDownloader:
|
||||
|
||||
return credentials
|
||||
|
||||
def download_file(self, upload_id: int, download_path: Optional[str], creds: dict) -> str:
|
||||
"""Performs a request to download a given upload by its ID, optionally saves the
|
||||
def download_file(self, url: str, download_path: Optional[str], credentials: dict) -> str:
|
||||
"""Performs a request to download a given file, optionally saves the
|
||||
file to the provided path and returns the final URL that was downloaded."""
|
||||
try:
|
||||
# No timeouts, chunked uploads, default retry strategy, should be all good?
|
||||
with self.client.get(f"/uploads/{upload_id}/download", data=creds, stream=True) as r:
|
||||
with self.client.get(url, data=credentials, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
if download_path is not None: # ...and it will be for external downloads.
|
||||
with tqdm.wrapattr(open(download_path, "wb"), "write",
|
||||
miniters=1, desc=str(upload_id),
|
||||
miniters=1, desc=url,
|
||||
total=int(r.headers.get('content-length', 0))) as f:
|
||||
for chunk in r.iter_content(chunk_size=1048576): # 1MB chunks
|
||||
f.write(chunk)
|
||||
@ -156,6 +172,10 @@ class GameDownloader:
|
||||
except HTTPError as e:
|
||||
raise ItchDownloadError(f"Unrecoverable download error: {e}")
|
||||
|
||||
def download_file_by_upload_id(self, upload_id: int, download_path: Optional[str], credentials: dict) -> str:
|
||||
"""Performs a request to download a given upload by its ID."""
|
||||
return self.download_file(f"/uploads/{upload_id}/download", download_path, credentials)
|
||||
|
||||
def download(self, url: str, skip_downloaded: bool = True):
|
||||
match = re.match(ITCH_GAME_URL_REGEX, url)
|
||||
if not match:
|
||||
@ -218,7 +238,7 @@ class GameDownloader:
|
||||
target_path = None if upload_is_external else os.path.join(paths['files'], file_name)
|
||||
|
||||
try:
|
||||
target_url = self.download_file(upload_id, target_path, credentials)
|
||||
target_url = self.download_file_by_upload_id(upload_id, target_path, credentials)
|
||||
except ItchDownloadError as e:
|
||||
errors.append(f"Download failed for upload {upload}: {e}")
|
||||
continue
|
||||
@ -245,6 +265,22 @@ class GameDownloader:
|
||||
logging.warning(f"Game {title} has external download URLs: {external_urls}")
|
||||
|
||||
# TODO: Screenshots and site assets
|
||||
if self.mirror_web:
|
||||
os.makedirs(paths['screenshots'], exist_ok=True)
|
||||
for screenshot in metadata['screenshots']:
|
||||
file_name = os.path.basename(screenshot)
|
||||
try:
|
||||
self.download_file(screenshot, os.path.join(paths['screenshots'], file_name), credentials={})
|
||||
except Exception as e:
|
||||
errors.append(f"Screenshot download failed (this is not fatal): {e}")
|
||||
|
||||
if 'cover_url' in metadata:
|
||||
try:
|
||||
cover_url = metadata['cover_url']
|
||||
self.download_file(cover_url, paths['cover'] + os.path.splitext(cover_url)[-1], credentials={})
|
||||
except Exception as e:
|
||||
errors.append(f"Cover art download failed (this is not fatal): {e}")
|
||||
|
||||
with open(paths['site'], 'w') as f:
|
||||
f.write(site.prettify())
|
||||
|
||||
@ -255,11 +291,11 @@ class GameDownloader:
|
||||
logging.error(f"Game {title} has download errors: {errors}")
|
||||
|
||||
logging.info("Finished job %s (%s)", url, title)
|
||||
return DownloadResult(url, True, errors, external_urls)
|
||||
return DownloadResult(url, len(errors) == 0, errors, external_urls)
|
||||
|
||||
|
||||
def drive_downloads(jobs: List[str], download_to: str, api_key: str, keys: Dict[int, str], parallel: int = 1):
|
||||
downloader = GameDownloader(download_to, api_key, keys)
|
||||
def drive_downloads(jobs: List[str], download_to: str, mirror_web: bool, api_key: str, keys: Dict[int, str], parallel: int = 1):
|
||||
downloader = GameDownloader(download_to, mirror_web, api_key, keys)
|
||||
tqdm_args = {
|
||||
"desc": "Games",
|
||||
"unit": "game",
|
||||
|
47
itch_dl/infobox.py
Normal file
47
itch_dl/infobox.py
Normal file
@ -0,0 +1,47 @@
|
||||
from datetime import datetime
|
||||
from typing import TypedDict, Dict, List, Any, Tuple, Optional
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class InfoboxMetadata(TypedDict, total=False):
|
||||
pass
|
||||
|
||||
|
||||
def parse_date_block(td: BeautifulSoup) -> datetime:
|
||||
raise NotImplementedError("Not yet!")
|
||||
|
||||
|
||||
def parse_links(td: BeautifulSoup) -> Dict[str, str]:
|
||||
"""Parses blocks of comma-separated <a> blocks, returns a dict
|
||||
of link text -> URL it points at."""
|
||||
pass
|
||||
|
||||
|
||||
def parse_text_from_links(td: BeautifulSoup) -> List[str]:
|
||||
return list(parse_links(td).keys())
|
||||
|
||||
|
||||
def parse_tr(name: str, content: BeautifulSoup) -> Optional[Tuple[str, Any]]:
|
||||
if name == "Updated":
|
||||
pass
|
||||
|
||||
|
||||
def parse_infobox(infobox: BeautifulSoup) -> dict:
|
||||
"""Feed it <div class="game_info_panel_widget">, out goes a dict
|
||||
of parsed metadata blocks."""
|
||||
meta = InfoboxMetadata()
|
||||
|
||||
for tr in infobox.find_all("tr"):
|
||||
tds = tr.find_all("td")
|
||||
if len(tds) < 2:
|
||||
continue
|
||||
|
||||
name_td, content_td = tds[0], tds[1]
|
||||
name = name_td.text.strip()
|
||||
|
||||
parsed_block = parse_tr(name, content_td)
|
||||
if parsed_block:
|
||||
meta[parsed_block[0]] = parsed_block[1]
|
||||
|
||||
return meta
|
Loading…
Reference in New Issue
Block a user