Files
CosmicScale 86d4a949b0 Convert art_downloader from JavaScript to Python and remove Node.js dependencies
- Convert art_downloader from JavaScript to Python
- Remove nodejs, npm, puppeteer and chromium dependencies
2025-10-03 19:45:13 +01:00

129 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python3
import sys
import csv
import urllib.request
from urllib.parse import urlparse
from pathlib import Path
from html.parser import HTMLParser
CSV_FILE_PATH = './scripts/helper/ArtDB.csv'
OUTPUT_DIR = './icons/art/tmp'
IGNORED_IMAGES = [
"https://assets-prd.ignimgs.com/2025/04/03/switch2-doodle-1743697401557.png",
"https://assets-prd.ignimgs.com/avatars/4ec71177e4b0ca04b5aab1c8/Nix_-_IGN_-_202x-1691124964030.png",
"https://media.ign.com/boards/images/icons2/supers_ghostrider.gif"
]
SEARCH_DOMAINS = [
"https://assets-prd.ignimgs.com",
"https://media.ign.com",
"https://ps2media.ign.com",
"https://ps3media.ign.com",
"https://media.gamestats.com",
"https://assets1.ignimgs.com",
]
def find_url_for_game_id(game_id: str) -> str | None:
"""Search CSV for the given game ID and return full IGN URL."""
try:
with open(CSV_FILE_PATH, newline="", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile, delimiter="|")
for row in reader:
if len(row) >= 2 and row[0] == game_id:
return f"https://www.ign.com/games/{row[1]}"
except FileNotFoundError:
print(f"CSV file not found: {CSV_FILE_PATH}")
sys.exit(1)
return None
class ImgParser(HTMLParser):
"""HTML parser that finds the first image for a given domain."""
def __init__(self, domain):
super().__init__()
self.domain = domain
self.found_img = None
def handle_starttag(self, tag, attrs):
if tag == "img" and not self.found_img:
attrs = dict(attrs)
src = attrs.get("src")
if src and src.startswith(self.domain):
clean_src = src.split("?")[0]
if clean_src not in IGNORED_IMAGES:
self.found_img = clean_src
def fetch_page(url: str) -> str:
req = urllib.request.Request(
url,
headers={"User-Agent": "Mozilla/5.0"}
)
with urllib.request.urlopen(req) as resp:
return resp.read().decode("utf-8", errors="ignore")
def download_image(url: str, destination: Path):
req = urllib.request.Request(
url,
headers={"User-Agent": "Mozilla/5.0"}
)
with urllib.request.urlopen(req) as resp, open(destination, "wb") as f:
f.write(resp.read())
def main():
if len(sys.argv) < 2:
print("Usage: python art_downloader.py <gameid>")
sys.exit(1)
game_id = sys.argv[1]
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
url = find_url_for_game_id(game_id)
if not url:
print(f'Game ID "{game_id}" not found in ArtDB.csv')
sys.exit(1)
print(f"Navigating to: {url}")
try:
html = fetch_page(url)
except Exception as e:
print(f"Failed to fetch the page: {e}")
sys.exit(1)
img_url = None
for i, domain in enumerate(SEARCH_DOMAINS):
parser = ImgParser(domain)
parser.feed(html)
if parser.found_img:
img_url = parser.found_img
break
else:
if i < len(SEARCH_DOMAINS) - 1:
next_domain = SEARCH_DOMAINS[i + 1]
print(f"No image found on {domain.replace('https://', '')}. Checking {next_domain.replace('https://', '')}...")
if img_url:
file_extension = Path(urlparse(img_url).path).suffix
file_name = Path(OUTPUT_DIR) / f"{game_id}{file_extension}"
print(f"Downloading image from: {img_url}")
try:
download_image(img_url, file_name)
print(f"Saved as: {file_name}")
sys.exit(0)
except Exception as e:
print(f"Error downloading image: {e}")
sys.exit(1)
else:
print("No image found on any source.")
sys.exit(1)
if __name__ == "__main__":
main()