mirror of
https://github.com/DragoonAethis/itch-dl.git
synced 2024-12-21 02:21:52 +01:00
Guess the decompressed file size for downloaded Zip/Tar archives
For some archives, the Itch API returns the expected decompressed file size, but serves compressed archives with its content. Iterate over Zip and Tar archives to get the expected decompressed file size. In case there's anything wrong with the archives, just quietly ignore these. Fixes #21
This commit is contained in:
parent
00c3f79dbb
commit
06f75d4996
@ -3,6 +3,8 @@ import json
|
|||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import zipfile
|
||||||
|
import tarfile
|
||||||
from typing import List, Dict, TypedDict, Optional, Union
|
from typing import List, Dict, TypedDict, Optional, Union
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -219,6 +221,34 @@ class GameDownloader:
|
|||||||
"""Performs a request to download a given upload by its ID."""
|
"""Performs a request to download a given upload by its ID."""
|
||||||
return self.download_file(f"/uploads/{upload_id}/download", download_path, credentials)
|
return self.download_file(f"/uploads/{upload_id}/download", download_path, credentials)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_decompressed_content_size(target_path) -> None | int:
|
||||||
|
"""For some files, Itch API returns the decompressed file size, but serves
|
||||||
|
compressed downloads. Try to figure out the decompressed size. It may be
|
||||||
|
a single file in the root, or a container + files in it."""
|
||||||
|
if zipfile.is_zipfile(target_path):
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(target_path) as f:
|
||||||
|
# Zip files contain either directories or files. The file format
|
||||||
|
# is compression-aware, compress_size is packed, file_size is unpacked.
|
||||||
|
file_infos = [i for i in f.infolist() if not i.is_dir()]
|
||||||
|
return None if len(file_infos) == 0 else sum(i.file_size for i in file_infos)
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if tarfile.is_tarfile(target_path):
|
||||||
|
try:
|
||||||
|
with tarfile.open(target_path) as f:
|
||||||
|
# Tar files can contain any Unix "file", so regular files,
|
||||||
|
# directories, symlinks, devices and FIFOs are fair game...
|
||||||
|
# On the other hand, TAR is not compression-aware.
|
||||||
|
file_infos = [i for i in f.getmembers() if i.isfile()]
|
||||||
|
return None if len(file_infos) == 0 else sum(i.size for i in file_infos)
|
||||||
|
except tarfile.TarError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def download(self, url: str, skip_downloaded: bool = True):
|
def download(self, url: str, skip_downloaded: bool = True):
|
||||||
match = re.match(ITCH_GAME_URL_REGEX, url)
|
match = re.match(ITCH_GAME_URL_REGEX, url)
|
||||||
if not match:
|
if not match:
|
||||||
@ -274,14 +304,14 @@ class GameDownloader:
|
|||||||
|
|
||||||
upload_id = upload["id"]
|
upload_id = upload["id"]
|
||||||
file_name = upload["filename"]
|
file_name = upload["filename"]
|
||||||
file_size = upload.get("size")
|
expected_size = upload.get("size")
|
||||||
upload_is_external = upload["storage"] == "external"
|
upload_is_external = upload["storage"] == "external"
|
||||||
|
|
||||||
logging.debug(
|
logging.debug(
|
||||||
"Downloading '%s' (%d), %s",
|
"Downloading '%s' (%d), %s",
|
||||||
file_name,
|
file_name,
|
||||||
upload_id,
|
upload_id,
|
||||||
f"{file_size} bytes" if file_size is not None else "unknown size",
|
f"{expected_size} bytes" if expected_size is not None else "unknown size",
|
||||||
)
|
)
|
||||||
|
|
||||||
target_path = None if upload_is_external else os.path.join(paths["files"], file_name)
|
target_path = None if upload_is_external else os.path.join(paths["files"], file_name)
|
||||||
@ -295,13 +325,24 @@ class GameDownloader:
|
|||||||
if upload_is_external:
|
if upload_is_external:
|
||||||
logging.debug("Found external download URL for %s: %s", target_url)
|
logging.debug("Found external download URL for %s: %s", target_url)
|
||||||
external_urls.append(target_url)
|
external_urls.append(target_url)
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
downloaded_file_size = os.stat(target_path).st_size
|
downloaded_file_stat = os.stat(target_path)
|
||||||
if target_path is not None and file_size is not None and downloaded_file_size != file_size:
|
|
||||||
errors.append(f"File size is {downloaded_file_size}, expected {file_size} for upload {upload}")
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
errors.append(f"Downloaded file not found for upload {upload}")
|
errors.append(f"Downloaded file not found for upload {upload}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
downloaded_size = downloaded_file_stat.st_size
|
||||||
|
content_size = self.get_decompressed_content_size(target_path)
|
||||||
|
print("expected", expected_size, "downloaded", downloaded_size, "content", content_size)
|
||||||
|
|
||||||
|
if (
|
||||||
|
all(x is not None for x in (target_path, expected_size, downloaded_size))
|
||||||
|
and downloaded_size != expected_size
|
||||||
|
and content_size != expected_size
|
||||||
|
):
|
||||||
|
errors.append(f"Downloaded file size is {downloaded_size} (content {content_size}), expected {expected_size} for upload {upload}")
|
||||||
|
|
||||||
logging.debug("Done downloading files for %s", title)
|
logging.debug("Done downloading files for %s", title)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
Reference in New Issue
Block a user