#!/usr/bin/env python3 import sys import csv import urllib.request from urllib.parse import urlparse from pathlib import Path from html.parser import HTMLParser CSV_FILE_PATH = './scripts/helper/ArtDB.csv' OUTPUT_DIR = './icons/art/tmp' IGNORED_IMAGES = [ "https://assets-prd.ignimgs.com/2025/04/03/switch2-doodle-1743697401557.png", "https://assets-prd.ignimgs.com/avatars/4ec71177e4b0ca04b5aab1c8/Nix_-_IGN_-_202x-1691124964030.png", "https://media.ign.com/boards/images/icons2/supers_ghostrider.gif" ] SEARCH_DOMAINS = [ "https://assets-prd.ignimgs.com", "https://media.ign.com", "https://ps2media.ign.com", "https://ps3media.ign.com", "https://media.gamestats.com", "https://assets1.ignimgs.com", ] def find_url_for_game_id(game_id: str) -> str | None: """Search CSV for the given game ID and return full IGN URL.""" try: with open(CSV_FILE_PATH, newline="", encoding="utf-8") as csvfile: reader = csv.reader(csvfile, delimiter="|") for row in reader: if len(row) >= 2 and row[0] == game_id: return f"https://www.ign.com/games/{row[1]}" except FileNotFoundError: print(f"CSV file not found: {CSV_FILE_PATH}") sys.exit(1) return None class ImgParser(HTMLParser): """HTML parser that finds the first image for a given domain.""" def __init__(self, domain): super().__init__() self.domain = domain self.found_img = None def handle_starttag(self, tag, attrs): if tag == "img" and not self.found_img: attrs = dict(attrs) src = attrs.get("src") if src and src.startswith(self.domain): clean_src = src.split("?")[0] if clean_src not in IGNORED_IMAGES: self.found_img = clean_src def fetch_page(url: str) -> str: req = urllib.request.Request( url, headers={"User-Agent": "Mozilla/5.0"} ) with urllib.request.urlopen(req) as resp: return resp.read().decode("utf-8", errors="ignore") def download_image(url: str, destination: Path): req = urllib.request.Request( url, headers={"User-Agent": "Mozilla/5.0"} ) with urllib.request.urlopen(req) as resp, open(destination, "wb") as f: f.write(resp.read()) def main(): if len(sys.argv) < 2: print("Usage: python art_downloader.py ") sys.exit(1) game_id = sys.argv[1] Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) url = find_url_for_game_id(game_id) if not url: print(f'Game ID "{game_id}" not found in ArtDB.csv') sys.exit(1) print(f"Navigating to: {url}") try: html = fetch_page(url) except Exception as e: print(f"Failed to fetch the page: {e}") sys.exit(1) img_url = None for i, domain in enumerate(SEARCH_DOMAINS): parser = ImgParser(domain) parser.feed(html) if parser.found_img: img_url = parser.found_img break else: if i < len(SEARCH_DOMAINS) - 1: next_domain = SEARCH_DOMAINS[i + 1] print(f"No image found on {domain.replace('https://', '')}. Checking {next_domain.replace('https://', '')}...") if img_url: file_extension = Path(urlparse(img_url).path).suffix file_name = Path(OUTPUT_DIR) / f"{game_id}{file_extension}" print(f"Downloading image from: {img_url}") try: download_image(img_url, file_name) print(f"Saved as: {file_name}") sys.exit(0) except Exception as e: print(f"Error downloading image: {e}") sys.exit(1) else: print("No image found on any source.") sys.exit(1) if __name__ == "__main__": main()