forked from Mirrors/itch-dl
Add filtering job URLs by regex/globs
This commit is contained in:
@@ -54,6 +54,12 @@ def parse_args() -> argparse.Namespace:
|
|||||||
help="filter downloaded files with a shell-style glob/fnmatch (unmatched files are skipped)")
|
help="filter downloaded files with a shell-style glob/fnmatch (unmatched files are skipped)")
|
||||||
parser.add_argument("--filter-files-regex", metavar="regex", default=None,
|
parser.add_argument("--filter-files-regex", metavar="regex", default=None,
|
||||||
help="filter downloaded files with a Python regex (unmatched files are skipped)")
|
help="filter downloaded files with a Python regex (unmatched files are skipped)")
|
||||||
|
|
||||||
|
parser.add_argument("--filter-urls-glob", metavar="glob", default=None,
|
||||||
|
help="filter itch URLs with a shell-style glob/fnmatch (unmatched URLs are skipped)")
|
||||||
|
parser.add_argument("--filter-urls-regex", metavar="regex", default=None,
|
||||||
|
help="filter itch URLs with a Python regex (unmatched URLs are skipped)")
|
||||||
|
|
||||||
parser.add_argument("--verbose", action="store_true",
|
parser.add_argument("--verbose", action="store_true",
|
||||||
help="print verbose logs")
|
help="print verbose logs")
|
||||||
|
|
||||||
@@ -89,8 +95,10 @@ def run() -> int:
|
|||||||
)
|
)
|
||||||
|
|
||||||
jobs = get_jobs_for_url_or_path(url_or_path, settings)
|
jobs = get_jobs_for_url_or_path(url_or_path, settings)
|
||||||
jobs = list(set(jobs)) # Deduplicate, just in case...
|
logging.info("Found %d URL(s) total.", len(jobs))
|
||||||
logging.info("Found %d URL(s).", len(jobs))
|
|
||||||
|
jobs = preprocess_job_urls(jobs, settings)
|
||||||
|
logging.info("Will process %d URL(s) after filtering and deduplication.", len(jobs))
|
||||||
|
|
||||||
if len(jobs) == 0:
|
if len(jobs) == 0:
|
||||||
sys.exit("No URLs to download.")
|
sys.exit("No URLs to download.")
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ class Settings:
|
|||||||
filter_files_glob: str | None = None
|
filter_files_glob: str | None = None
|
||||||
filter_files_regex: str | None = None
|
filter_files_regex: str | None = None
|
||||||
|
|
||||||
|
filter_urls_glob: str | None = None
|
||||||
|
filter_urls_regex: str | None = None
|
||||||
|
|
||||||
verbose: bool = False
|
verbose: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from http.client import responses
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from .api import ItchApiClient
|
from .api import ItchApiClient
|
||||||
from .utils import ItchDownloadError, get_int_after_marker_in_json
|
from .utils import ItchDownloadError, get_int_after_marker_in_json, should_skip_item_by_glob, should_skip_item_by_regex
|
||||||
from .consts import ITCH_API, ITCH_BASE, ITCH_URL, ITCH_BROWSER_TYPES
|
from .consts import ITCH_API, ITCH_BASE, ITCH_URL, ITCH_BROWSER_TYPES
|
||||||
from .config import Settings
|
from .config import Settings
|
||||||
from .keys import get_owned_games
|
from .keys import get_owned_games
|
||||||
@@ -251,3 +251,19 @@ def get_jobs_for_url_or_path(path_or_url: str, settings: Settings) -> list[str]:
|
|||||||
return get_jobs_for_path(path_or_url)
|
return get_jobs_for_path(path_or_url)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Cannot handle path or URL: {path_or_url}")
|
raise NotImplementedError(f"Cannot handle path or URL: {path_or_url}")
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_job_urls(jobs: list[str], settings: Settings) -> list[str]:
|
||||||
|
cleaned_jobs = set()
|
||||||
|
for job in jobs:
|
||||||
|
job = job.strip()
|
||||||
|
|
||||||
|
if should_skip_item_by_glob("URL", job, settings.filter_urls_glob):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if should_skip_item_by_regex("URL", job, settings.filter_urls_regex):
|
||||||
|
continue
|
||||||
|
|
||||||
|
cleaned_jobs.add(job)
|
||||||
|
|
||||||
|
return list(cleaned_jobs)
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
import re
|
import re
|
||||||
|
import logging
|
||||||
|
from fnmatch import fnmatch
|
||||||
|
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
class ItchDownloadError(Exception):
|
class ItchDownloadError(Exception):
|
||||||
@@ -31,3 +35,19 @@ def get_int_after_marker_in_json(text: str, marker: str, key: str) -> int | None
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
return int(found_ints[0])
|
return int(found_ints[0])
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_item_by_glob(kind: Literal['File'] | Literal['URL'], item: str, glob: str):
|
||||||
|
if glob and not fnmatch(item, glob):
|
||||||
|
logging.info("%s '%s' does not match the glob filter '%s', skipping", kind, item, glob)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_item_by_regex(kind: Literal['File'] | Literal['URL'], item: str, regex: str):
|
||||||
|
if regex and not re.fullmatch(regex, item):
|
||||||
|
logging.info("%s '%s' does not match the regex filter '%s', skipping", kind, item, regex)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user