forked from Mirrors/itch-dl
Implement infobox parsing, misc bugfixes, version bump
This commit is contained in:
@@ -5,17 +5,43 @@ from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class InfoboxMetadata(TypedDict, total=False):
|
||||
pass
|
||||
updated_at: datetime
|
||||
released_at: datetime
|
||||
published_at: datetime
|
||||
status: str
|
||||
platforms: List[str] # Windows/macOS/Linux/etc
|
||||
publisher: str
|
||||
author: Dict[str, str] # See impl below!
|
||||
authors: Dict[str, str] # Links
|
||||
genre: Dict[str, str] # Links
|
||||
tools: Dict[str, str] # Links
|
||||
license: Dict[str, str] # Links
|
||||
asset_license: Dict[str, str] # Links
|
||||
tags: Dict[str, str] # Links
|
||||
length: str
|
||||
multiplayer: Dict[str, str] # Links
|
||||
player_count: str
|
||||
accessibility: Dict[str, str] # Links
|
||||
inputs: Dict[str, str] # Links
|
||||
links: Dict[str, str] # Links
|
||||
mentions: Dict[str, str] # Links
|
||||
|
||||
|
||||
def parse_date_block(td: BeautifulSoup) -> datetime:
|
||||
raise NotImplementedError("Not yet!")
|
||||
def parse_date_block(td: BeautifulSoup) -> Optional[datetime]:
|
||||
abbr = td.find("abbr")
|
||||
if not abbr or 'title' not in abbr.attrs:
|
||||
return None
|
||||
|
||||
date_str, time_str = abbr['title'].split('@')
|
||||
date = datetime.strptime(date_str.strip(), "%d %B %Y")
|
||||
time = datetime.strptime(time_str.strip(), "%H:%M")
|
||||
return datetime(date.year, date.month, date.day, time.hour, time.minute)
|
||||
|
||||
|
||||
def parse_links(td: BeautifulSoup) -> Dict[str, str]:
|
||||
"""Parses blocks of comma-separated <a> blocks, returns a dict
|
||||
of link text -> URL it points at."""
|
||||
pass
|
||||
return {link.text.strip(): link['href'] for link in td.find_all("a")}
|
||||
|
||||
|
||||
def parse_text_from_links(td: BeautifulSoup) -> List[str]:
|
||||
@@ -24,7 +50,54 @@ def parse_text_from_links(td: BeautifulSoup) -> List[str]:
|
||||
|
||||
def parse_tr(name: str, content: BeautifulSoup) -> Optional[Tuple[str, Any]]:
|
||||
if name == "Updated":
|
||||
pass
|
||||
return "updated_at", parse_date_block(content)
|
||||
elif name == "Release date":
|
||||
return "released_at", parse_date_block(content)
|
||||
elif name == "Published":
|
||||
return "published_at", parse_date_block(content)
|
||||
elif name == "Status":
|
||||
return "status", parse_text_from_links(content)[0]
|
||||
elif name == "Platforms":
|
||||
return "platforms", parse_text_from_links(content)
|
||||
elif name == "Publisher":
|
||||
return "publisher", content.text.strip()
|
||||
elif name == "Rating":
|
||||
return None # Read the AggregatedRating block instead!
|
||||
elif name == "Author":
|
||||
author, author_url = parse_links(content).popitem()
|
||||
return "author", {"author": author, "author_url": author_url}
|
||||
elif name == "Authors":
|
||||
return "authors", parse_links(content)
|
||||
elif name == "Genre":
|
||||
return "genre", parse_links(content)
|
||||
elif name == "Made with":
|
||||
return "tools", parse_links(content)
|
||||
elif name == "License":
|
||||
return "license", parse_links(content)
|
||||
elif name == "Asset license":
|
||||
return "asset_license", parse_links(content)
|
||||
elif name == "Tags":
|
||||
return "tags", parse_links(content)
|
||||
elif name == "Average session":
|
||||
return "length", parse_text_from_links(content)[0]
|
||||
elif name == "Languages":
|
||||
return "languages", parse_links(content)
|
||||
elif name == "Multiplayer":
|
||||
return "multiplayer", parse_links(content)
|
||||
elif name == "Player count":
|
||||
return "player_count", content.text.strip()
|
||||
elif name == "Accessibility":
|
||||
return "accessibility", parse_links(content)
|
||||
elif name == "Inputs":
|
||||
return "inputs", parse_links(content)
|
||||
elif name == "Links":
|
||||
return "links", parse_links(content)
|
||||
elif name == "Mentions":
|
||||
return "mentions", parse_links(content)
|
||||
else:
|
||||
# Oops, you need to extend this with something new. Sorry.
|
||||
# Make sure to add the block name to InfoboxMetadata as well!
|
||||
raise NotImplementedError(f"Unknown infobox block name '{name}' - please file a new itch-dl issue.")
|
||||
|
||||
|
||||
def parse_infobox(infobox: BeautifulSoup) -> dict:
|
||||
@@ -42,6 +115,7 @@ def parse_infobox(infobox: BeautifulSoup) -> dict:
|
||||
|
||||
parsed_block = parse_tr(name, content_td)
|
||||
if parsed_block:
|
||||
# noinspection PyTypedDict
|
||||
meta[parsed_block[0]] = parsed_block[1]
|
||||
|
||||
return meta
|
||||
|
||||
Reference in New Issue
Block a user