From 06f75d4996410a850089b3b5cedcda57bc594a90 Mon Sep 17 00:00:00 2001 From: Ryszard Knop Date: Fri, 8 Nov 2024 21:54:55 +0100 Subject: [PATCH] Guess the decompressed file size for downloaded Zip/Tar archives For some archives, the Itch API returns the expected decompressed file size, but serves compressed archives with its content. Iterate over Zip and Tar archives to get the expected decompressed file size. In case there's anything wrong with the archives, just quietly ignore these. Fixes #21 --- itch_dl/downloader.py | 51 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/itch_dl/downloader.py b/itch_dl/downloader.py index beeff24..3ab11b2 100644 --- a/itch_dl/downloader.py +++ b/itch_dl/downloader.py @@ -3,6 +3,8 @@ import json import re import logging import urllib.parse +import zipfile +import tarfile from typing import List, Dict, TypedDict, Optional, Union from bs4 import BeautifulSoup @@ -219,6 +221,34 @@ class GameDownloader: """Performs a request to download a given upload by its ID.""" return self.download_file(f"/uploads/{upload_id}/download", download_path, credentials) + @staticmethod + def get_decompressed_content_size(target_path) -> None | int: + """For some files, Itch API returns the decompressed file size, but serves + compressed downloads. Try to figure out the decompressed size. It may be + a single file in the root, or a container + files in it.""" + if zipfile.is_zipfile(target_path): + try: + with zipfile.ZipFile(target_path) as f: + # Zip files contain either directories or files. The file format + # is compression-aware, compress_size is packed, file_size is unpacked. + file_infos = [i for i in f.infolist() if not i.is_dir()] + return None if len(file_infos) == 0 else sum(i.file_size for i in file_infos) + except zipfile.BadZipFile: + return None + + if tarfile.is_tarfile(target_path): + try: + with tarfile.open(target_path) as f: + # Tar files can contain any Unix "file", so regular files, + # directories, symlinks, devices and FIFOs are fair game... + # On the other hand, TAR is not compression-aware. + file_infos = [i for i in f.getmembers() if i.isfile()] + return None if len(file_infos) == 0 else sum(i.size for i in file_infos) + except tarfile.TarError: + return None + + return None + def download(self, url: str, skip_downloaded: bool = True): match = re.match(ITCH_GAME_URL_REGEX, url) if not match: @@ -274,14 +304,14 @@ class GameDownloader: upload_id = upload["id"] file_name = upload["filename"] - file_size = upload.get("size") + expected_size = upload.get("size") upload_is_external = upload["storage"] == "external" logging.debug( "Downloading '%s' (%d), %s", file_name, upload_id, - f"{file_size} bytes" if file_size is not None else "unknown size", + f"{expected_size} bytes" if expected_size is not None else "unknown size", ) target_path = None if upload_is_external else os.path.join(paths["files"], file_name) @@ -295,13 +325,24 @@ class GameDownloader: if upload_is_external: logging.debug("Found external download URL for %s: %s", target_url) external_urls.append(target_url) + continue try: - downloaded_file_size = os.stat(target_path).st_size - if target_path is not None and file_size is not None and downloaded_file_size != file_size: - errors.append(f"File size is {downloaded_file_size}, expected {file_size} for upload {upload}") + downloaded_file_stat = os.stat(target_path) except FileNotFoundError: errors.append(f"Downloaded file not found for upload {upload}") + continue + + downloaded_size = downloaded_file_stat.st_size + content_size = self.get_decompressed_content_size(target_path) + print("expected", expected_size, "downloaded", downloaded_size, "content", content_size) + + if ( + all(x is not None for x in (target_path, expected_size, downloaded_size)) + and downloaded_size != expected_size + and content_size != expected_size + ): + errors.append(f"Downloaded file size is {downloaded_size} (content {content_size}), expected {expected_size} for upload {upload}") logging.debug("Done downloading files for %s", title) except Exception as e: