mirror of
https://github.com/DragoonAethis/itch-dl.git
synced 2025-04-21 03:51:18 +02:00
Add filtering job URLs by regex/globs
This commit is contained in:
parent
5ab0dc0309
commit
a092532192
@ -54,6 +54,12 @@ def parse_args() -> argparse.Namespace:
|
||||
help="filter downloaded files with a shell-style glob/fnmatch (unmatched files are skipped)")
|
||||
parser.add_argument("--filter-files-regex", metavar="regex", default=None,
|
||||
help="filter downloaded files with a Python regex (unmatched files are skipped)")
|
||||
|
||||
parser.add_argument("--filter-urls-glob", metavar="glob", default=None,
|
||||
help="filter itch URLs with a shell-style glob/fnmatch (unmatched URLs are skipped)")
|
||||
parser.add_argument("--filter-urls-regex", metavar="regex", default=None,
|
||||
help="filter itch URLs with a Python regex (unmatched URLs are skipped)")
|
||||
|
||||
parser.add_argument("--verbose", action="store_true",
|
||||
help="print verbose logs")
|
||||
|
||||
@ -89,8 +95,10 @@ def run() -> int:
|
||||
)
|
||||
|
||||
jobs = get_jobs_for_url_or_path(url_or_path, settings)
|
||||
jobs = list(set(jobs)) # Deduplicate, just in case...
|
||||
logging.info("Found %d URL(s).", len(jobs))
|
||||
logging.info("Found %d URL(s) total.", len(jobs))
|
||||
|
||||
jobs = preprocess_job_urls(jobs, settings)
|
||||
logging.info("Will process %d URL(s) after filtering and deduplication.", len(jobs))
|
||||
|
||||
if len(jobs) == 0:
|
||||
sys.exit("No URLs to download.")
|
||||
|
@ -27,6 +27,9 @@ class Settings:
|
||||
filter_files_glob: str | None = None
|
||||
filter_files_regex: str | None = None
|
||||
|
||||
filter_urls_glob: str | None = None
|
||||
filter_urls_regex: str | None = None
|
||||
|
||||
verbose: bool = False
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@ from http.client import responses
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .api import ItchApiClient
|
||||
from .utils import ItchDownloadError, get_int_after_marker_in_json
|
||||
from .utils import ItchDownloadError, get_int_after_marker_in_json, should_skip_item_by_glob, should_skip_item_by_regex
|
||||
from .consts import ITCH_API, ITCH_BASE, ITCH_URL, ITCH_BROWSER_TYPES
|
||||
from .config import Settings
|
||||
from .keys import get_owned_games
|
||||
@ -251,3 +251,19 @@ def get_jobs_for_url_or_path(path_or_url: str, settings: Settings) -> list[str]:
|
||||
return get_jobs_for_path(path_or_url)
|
||||
else:
|
||||
raise NotImplementedError(f"Cannot handle path or URL: {path_or_url}")
|
||||
|
||||
|
||||
def preprocess_job_urls(jobs: list[str], settings: Settings) -> list[str]:
|
||||
cleaned_jobs = set()
|
||||
for job in jobs:
|
||||
job = job.strip()
|
||||
|
||||
if should_skip_item_by_glob("URL", job, settings.filter_urls_glob):
|
||||
continue
|
||||
|
||||
if should_skip_item_by_regex("URL", job, settings.filter_urls_regex):
|
||||
continue
|
||||
|
||||
cleaned_jobs.add(job)
|
||||
|
||||
return list(cleaned_jobs)
|
||||
|
@ -1,4 +1,8 @@
|
||||
import re
|
||||
import logging
|
||||
from fnmatch import fnmatch
|
||||
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class ItchDownloadError(Exception):
|
||||
@ -31,3 +35,19 @@ def get_int_after_marker_in_json(text: str, marker: str, key: str) -> int | None
|
||||
return None
|
||||
|
||||
return int(found_ints[0])
|
||||
|
||||
|
||||
def should_skip_item_by_glob(kind: Literal['File'] | Literal['URL'], item: str, glob: str):
|
||||
if glob and not fnmatch(item, glob):
|
||||
logging.info("%s '%s' does not match the glob filter '%s', skipping", kind, item, glob)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def should_skip_item_by_regex(kind: Literal['File'] | Literal['URL'], item: str, regex: str):
|
||||
if regex and not re.fullmatch(regex, item):
|
||||
logging.info("%s '%s' does not match the regex filter '%s', skipping", kind, item, regex)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
Loading…
x
Reference in New Issue
Block a user