Force use UTF-8 end-to-end for site downloading

Itch always uses UTF-8. This change prevents garbled output with
non-Latin scripts if requests or BS4 does not guess that correctly.
This commit is contained in:
Ryszard Knop 2023-01-29 15:22:24 +01:00
parent fb2e4c6736
commit e61ef0cba3
2 changed files with 24 additions and 5 deletions

View File

@ -28,7 +28,19 @@ class ItchApiClient:
self.requests.mount("https://", adapter)
self.requests.mount("http://", adapter)
def get(self, endpoint: str, append_api_key: bool = True, **kwargs) -> requests.Response:
def get(
self,
endpoint: str,
append_api_key: bool = True,
guess_encoding: bool = False,
**kwargs
) -> requests.Response:
"""Wrapper around `requests.get`.
:param endpoint: Path to fetch on the specified base URL.
:param append_api_key: Send an authenticated API request.
:param guess_encoding: Let requests guess the response encoding.
"""
if append_api_key:
params = kwargs.get('data') or {}
@ -42,4 +54,11 @@ class ItchApiClient:
else:
url = self.base_url + endpoint
return self.requests.get(url, **kwargs)
r = self.requests.get(url, **kwargs)
# Itch always returns UTF-8 pages and API responses. Force
# UTF-8 everywhere, except for binary file downloads.
if not guess_encoding:
r.encoding = 'utf-8'
return r

View File

@ -191,7 +191,7 @@ class GameDownloader:
file to the provided path and returns the final URL that was downloaded."""
try:
# No timeouts, chunked uploads, default retry strategy, should be all good?
with self.client.get(url, data=credentials, stream=True) as r:
with self.client.get(url, data=credentials, stream=True, guess_encoding=True) as r:
r.raise_for_status()
if download_path is not None: # ...and it will be for external downloads.
@ -320,8 +320,8 @@ class GameDownloader:
except Exception as e:
errors.append(f"Cover art download failed (this is not fatal): {e}")
with open(paths['site'], 'w') as f:
f.write(site.prettify())
with open(paths['site'], 'wb') as f:
f.write(site.prettify(encoding='utf-8'))
with open(paths['metadata'], 'w') as f:
json.dump(metadata, f, indent=4)