From e61ef0cba35db722e699ffd1142d1c386637097c Mon Sep 17 00:00:00 2001 From: Ryszard Knop Date: Sun, 29 Jan 2023 15:22:24 +0100 Subject: [PATCH] Force use UTF-8 end-to-end for site downloading Itch always uses UTF-8. This change prevents garbled output with non-Latin scripts if requests or BS4 does not guess that correctly. --- itch_dl/api.py | 23 +++++++++++++++++++++-- itch_dl/downloader.py | 6 +++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/itch_dl/api.py b/itch_dl/api.py index f416a63..f59cc2c 100644 --- a/itch_dl/api.py +++ b/itch_dl/api.py @@ -28,7 +28,19 @@ class ItchApiClient: self.requests.mount("https://", adapter) self.requests.mount("http://", adapter) - def get(self, endpoint: str, append_api_key: bool = True, **kwargs) -> requests.Response: + def get( + self, + endpoint: str, + append_api_key: bool = True, + guess_encoding: bool = False, + **kwargs + ) -> requests.Response: + """Wrapper around `requests.get`. + + :param endpoint: Path to fetch on the specified base URL. + :param append_api_key: Send an authenticated API request. + :param guess_encoding: Let requests guess the response encoding. + """ if append_api_key: params = kwargs.get('data') or {} @@ -42,4 +54,11 @@ class ItchApiClient: else: url = self.base_url + endpoint - return self.requests.get(url, **kwargs) + r = self.requests.get(url, **kwargs) + + # Itch always returns UTF-8 pages and API responses. Force + # UTF-8 everywhere, except for binary file downloads. + if not guess_encoding: + r.encoding = 'utf-8' + + return r diff --git a/itch_dl/downloader.py b/itch_dl/downloader.py index 3923852..b2e9f5f 100644 --- a/itch_dl/downloader.py +++ b/itch_dl/downloader.py @@ -191,7 +191,7 @@ class GameDownloader: file to the provided path and returns the final URL that was downloaded.""" try: # No timeouts, chunked uploads, default retry strategy, should be all good? - with self.client.get(url, data=credentials, stream=True) as r: + with self.client.get(url, data=credentials, stream=True, guess_encoding=True) as r: r.raise_for_status() if download_path is not None: # ...and it will be for external downloads. @@ -320,8 +320,8 @@ class GameDownloader: except Exception as e: errors.append(f"Cover art download failed (this is not fatal): {e}") - with open(paths['site'], 'w') as f: - f.write(site.prettify()) + with open(paths['site'], 'wb') as f: + f.write(site.prettify(encoding='utf-8')) with open(paths['metadata'], 'w') as f: json.dump(metadata, f, indent=4)