diff --git a/itch_dl/cli.py b/itch_dl/cli.py index cead23f..ba2a8bb 100644 --- a/itch_dl/cli.py +++ b/itch_dl/cli.py @@ -54,6 +54,12 @@ def parse_args() -> argparse.Namespace: help="filter downloaded files with a shell-style glob/fnmatch (unmatched files are skipped)") parser.add_argument("--filter-files-regex", metavar="regex", default=None, help="filter downloaded files with a Python regex (unmatched files are skipped)") + + parser.add_argument("--filter-urls-glob", metavar="glob", default=None, + help="filter itch URLs with a shell-style glob/fnmatch (unmatched URLs are skipped)") + parser.add_argument("--filter-urls-regex", metavar="regex", default=None, + help="filter itch URLs with a Python regex (unmatched URLs are skipped)") + parser.add_argument("--verbose", action="store_true", help="print verbose logs") @@ -89,8 +95,10 @@ def run() -> int: ) jobs = get_jobs_for_url_or_path(url_or_path, settings) - jobs = list(set(jobs)) # Deduplicate, just in case... - logging.info("Found %d URL(s).", len(jobs)) + logging.info("Found %d URL(s) total.", len(jobs)) + + jobs = preprocess_job_urls(jobs, settings) + logging.info("Will process %d URL(s) after filtering and deduplication.", len(jobs)) if len(jobs) == 0: sys.exit("No URLs to download.") diff --git a/itch_dl/config.py b/itch_dl/config.py index 607ff21..de149f4 100644 --- a/itch_dl/config.py +++ b/itch_dl/config.py @@ -27,6 +27,9 @@ class Settings: filter_files_glob: str | None = None filter_files_regex: str | None = None + filter_urls_glob: str | None = None + filter_urls_regex: str | None = None + verbose: bool = False diff --git a/itch_dl/handlers.py b/itch_dl/handlers.py index d5da373..8267598 100644 --- a/itch_dl/handlers.py +++ b/itch_dl/handlers.py @@ -7,7 +7,7 @@ from http.client import responses from bs4 import BeautifulSoup from .api import ItchApiClient -from .utils import ItchDownloadError, get_int_after_marker_in_json +from .utils import ItchDownloadError, get_int_after_marker_in_json, should_skip_item_by_glob, should_skip_item_by_regex from .consts import ITCH_API, ITCH_BASE, ITCH_URL, ITCH_BROWSER_TYPES from .config import Settings from .keys import get_owned_games @@ -251,3 +251,19 @@ def get_jobs_for_url_or_path(path_or_url: str, settings: Settings) -> list[str]: return get_jobs_for_path(path_or_url) else: raise NotImplementedError(f"Cannot handle path or URL: {path_or_url}") + + +def preprocess_job_urls(jobs: list[str], settings: Settings) -> list[str]: + cleaned_jobs = set() + for job in jobs: + job = job.strip() + + if should_skip_item_by_glob("URL", job, settings.filter_urls_glob): + continue + + if should_skip_item_by_regex("URL", job, settings.filter_urls_regex): + continue + + cleaned_jobs.add(job) + + return list(cleaned_jobs) diff --git a/itch_dl/utils.py b/itch_dl/utils.py index fd49885..145563a 100644 --- a/itch_dl/utils.py +++ b/itch_dl/utils.py @@ -1,4 +1,8 @@ import re +import logging +from fnmatch import fnmatch + +from typing import Literal class ItchDownloadError(Exception): @@ -31,3 +35,19 @@ def get_int_after_marker_in_json(text: str, marker: str, key: str) -> int | None return None return int(found_ints[0]) + + +def should_skip_item_by_glob(kind: Literal['File'] | Literal['URL'], item: str, glob: str): + if glob and not fnmatch(item, glob): + logging.info("%s '%s' does not match the glob filter '%s', skipping", kind, item, glob) + return True + + return False + + +def should_skip_item_by_regex(kind: Literal['File'] | Literal['URL'], item: str, regex: str): + if regex and not re.fullmatch(regex, item): + logging.info("%s '%s' does not match the regex filter '%s', skipping", kind, item, regex) + return True + + return False