mirror of
https://github.com/CosmicScale/PSBBN-Definitive-English-Patch.git
synced 2025-10-15 20:09:39 +02:00

- Convert art_downloader from JavaScript to Python - Remove nodejs, npm, puppeteer and chromium dependencies
129 lines
3.8 KiB
Python
Executable File
129 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
import csv
|
|
import urllib.request
|
|
from urllib.parse import urlparse
|
|
from pathlib import Path
|
|
from html.parser import HTMLParser
|
|
|
|
CSV_FILE_PATH = './scripts/helper/ArtDB.csv'
|
|
OUTPUT_DIR = './icons/art/tmp'
|
|
|
|
IGNORED_IMAGES = [
|
|
"https://assets-prd.ignimgs.com/2025/04/03/switch2-doodle-1743697401557.png",
|
|
"https://assets-prd.ignimgs.com/avatars/4ec71177e4b0ca04b5aab1c8/Nix_-_IGN_-_202x-1691124964030.png",
|
|
"https://media.ign.com/boards/images/icons2/supers_ghostrider.gif"
|
|
]
|
|
|
|
SEARCH_DOMAINS = [
|
|
"https://assets-prd.ignimgs.com",
|
|
"https://media.ign.com",
|
|
"https://ps2media.ign.com",
|
|
"https://ps3media.ign.com",
|
|
"https://media.gamestats.com",
|
|
"https://assets1.ignimgs.com",
|
|
]
|
|
|
|
|
|
def find_url_for_game_id(game_id: str) -> str | None:
|
|
"""Search CSV for the given game ID and return full IGN URL."""
|
|
try:
|
|
with open(CSV_FILE_PATH, newline="", encoding="utf-8") as csvfile:
|
|
reader = csv.reader(csvfile, delimiter="|")
|
|
for row in reader:
|
|
if len(row) >= 2 and row[0] == game_id:
|
|
return f"https://www.ign.com/games/{row[1]}"
|
|
except FileNotFoundError:
|
|
print(f"CSV file not found: {CSV_FILE_PATH}")
|
|
sys.exit(1)
|
|
return None
|
|
|
|
|
|
class ImgParser(HTMLParser):
|
|
"""HTML parser that finds the first image for a given domain."""
|
|
def __init__(self, domain):
|
|
super().__init__()
|
|
self.domain = domain
|
|
self.found_img = None
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "img" and not self.found_img:
|
|
attrs = dict(attrs)
|
|
src = attrs.get("src")
|
|
if src and src.startswith(self.domain):
|
|
clean_src = src.split("?")[0]
|
|
if clean_src not in IGNORED_IMAGES:
|
|
self.found_img = clean_src
|
|
|
|
|
|
def fetch_page(url: str) -> str:
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={"User-Agent": "Mozilla/5.0"}
|
|
)
|
|
with urllib.request.urlopen(req) as resp:
|
|
return resp.read().decode("utf-8", errors="ignore")
|
|
|
|
|
|
def download_image(url: str, destination: Path):
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={"User-Agent": "Mozilla/5.0"}
|
|
)
|
|
with urllib.request.urlopen(req) as resp, open(destination, "wb") as f:
|
|
f.write(resp.read())
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python art_downloader.py <gameid>")
|
|
sys.exit(1)
|
|
|
|
game_id = sys.argv[1]
|
|
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
|
|
|
url = find_url_for_game_id(game_id)
|
|
if not url:
|
|
print(f'Game ID "{game_id}" not found in ArtDB.csv')
|
|
sys.exit(1)
|
|
|
|
print(f"Navigating to: {url}")
|
|
try:
|
|
html = fetch_page(url)
|
|
except Exception as e:
|
|
print(f"Failed to fetch the page: {e}")
|
|
sys.exit(1)
|
|
|
|
img_url = None
|
|
for i, domain in enumerate(SEARCH_DOMAINS):
|
|
parser = ImgParser(domain)
|
|
parser.feed(html)
|
|
if parser.found_img:
|
|
img_url = parser.found_img
|
|
break
|
|
else:
|
|
if i < len(SEARCH_DOMAINS) - 1:
|
|
next_domain = SEARCH_DOMAINS[i + 1]
|
|
print(f"No image found on {domain.replace('https://', '')}. Checking {next_domain.replace('https://', '')}...")
|
|
|
|
if img_url:
|
|
file_extension = Path(urlparse(img_url).path).suffix
|
|
file_name = Path(OUTPUT_DIR) / f"{game_id}{file_extension}"
|
|
|
|
print(f"Downloading image from: {img_url}")
|
|
try:
|
|
download_image(img_url, file_name)
|
|
print(f"Saved as: {file_name}")
|
|
sys.exit(0)
|
|
except Exception as e:
|
|
print(f"Error downloading image: {e}")
|
|
sys.exit(1)
|
|
else:
|
|
print("No image found on any source.")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|