1
0
forked from Mirrors/itch-dl

Implement downloading entire creator pages

This commit is contained in:
Ryszard Knop
2024-12-19 14:02:30 +01:00
parent 423fa6b389
commit c11774475d

View File

@@ -4,6 +4,7 @@ import logging
import urllib.parse import urllib.parse
from typing import List, Set, Optional from typing import List, Set, Optional
from http.client import responses
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from .api import ItchApiClient from .api import ItchApiClient
@@ -117,6 +118,27 @@ def get_jobs_for_collection_json(url: str, client: ItchApiClient) -> List[str]:
return list(found_urls) return list(found_urls)
def get_jobs_for_creator(creator: str, client: ItchApiClient) -> List[str]:
logging.info("Downloading public games for creator %s", creator)
r = client.get(f"https://{ITCH_BASE}/profile/{creator}", append_api_key=False)
if not r.ok:
raise ItchDownloadError(f"Could not fetch the creator page: HTTP {r.status_code} {responses[r.status_code]}")
prefix = f"https://{creator}.{ITCH_BASE}/"
game_links = set()
soup = BeautifulSoup(r.text, features="xml")
for link in soup.select("a.game_link"):
link_url = link.attrs.get('href')
if not link_url:
continue
if link_url.startswith(prefix):
game_links.add(link_url)
return list(sorted(game_links))
def get_jobs_for_itch_url(url: str, client: ItchApiClient) -> List[str]: def get_jobs_for_itch_url(url: str, client: ItchApiClient) -> List[str]:
if url.startswith("http://"): if url.startswith("http://"):
logging.info("HTTP link provided, upgrading to HTTPS") logging.info("HTTP link provided, upgrading to HTTPS")
@@ -160,9 +182,7 @@ def get_jobs_for_itch_url(url: str, client: ItchApiClient) -> List[str]:
elif site == "profile": # Forum Profile elif site == "profile": # Forum Profile
if len(url_path_parts) >= 2: if len(url_path_parts) >= 2:
username = url_path_parts[1] return get_jobs_for_creator(url_path_parts[1], client)
logging.info("Correcting user profile to creator page for %s", username)
return get_jobs_for_itch_url(f"https://{username}.{ITCH_BASE}", client)
raise ValueError("itch-dl expects a username in profile links.") raise ValueError("itch-dl expects a username in profile links.")
@@ -179,8 +199,7 @@ def get_jobs_for_itch_url(url: str, client: ItchApiClient) -> List[str]:
elif url_parts.netloc.endswith(f".{ITCH_BASE}"): elif url_parts.netloc.endswith(f".{ITCH_BASE}"):
if len(url_path_parts) == 0: # Author if len(url_path_parts) == 0: # Author
# TODO: Find I.UserPage, regex for "user_id": [0-9]+, find the responsible API? return get_jobs_for_creator(url_parts.netloc.split('.')[0], client)
raise NotImplementedError("itch-dl cannot download author pages yet.")
else: # Single game else: # Single game
# Just clean and return the URL: # Just clean and return the URL: