diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f91d0b..b362d62 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,20 +2,20 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/psf/black - rev: "24.10.0" + rev: "25.1.0" hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.9.4 hooks: - id: ruff - repo: https://github.com/RobertCraigie/pyright-python - rev: v1.1.383 + rev: v1.1.393 hooks: - id: pyright name: pyright (system) diff --git a/CHANGELOG.md b/CHANGELOG.md index c437da8..4033a33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Upgrade to browsertrix crawler 1.5.0, Python 3.13 and others (#462) + ## [2.1.7] - 2024-01-10 ### Changed diff --git a/Dockerfile b/Dockerfile index 9d88f45..d2854dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,16 @@ -FROM webrecorder/browsertrix-crawler:1.4.2 +FROM webrecorder/browsertrix-crawler:1.5.0 LABEL org.opencontainers.image.source=https://github.com/openzim/zimit +# add deadsnakes ppa for latest Python on Ubuntu +RUN add-apt-repository ppa:deadsnakes/ppa -y + RUN apt-get update \ && apt-get install -qqy --no-install-recommends \ libmagic1 \ - python3.12-venv \ + python3.13-venv \ && rm -rf /var/lib/apt/lists/* \ # python setup (in venv not to conflict with browsertrix) - && python3.12 -m venv /app/zimit \ + && python3.13 -m venv /app/zimit \ # placeholder (default output location) && mkdir -p /output \ # disable chrome upgrade diff --git a/pyproject.toml b/pyproject.toml index ffedf8b..e4e7696 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["hatchling", "hatch-openzim==0.2.0"] +requires = ["hatchling", "hatch-openzim"] build-backend = "hatchling.build" [project] name = "zimit" -requires-python = ">=3.12,<3.13" +requires-python = ">=3.13,<3.14" description = "Make ZIM file from any website through crawling" readme = "README.md" dependencies = [ @@ -26,20 +26,20 @@ scripts = [ "invoke==2.2.0", ] lint = [ - "black==24.10.0", - "ruff==0.6.9", + "black==25.1.0", + "ruff==0.9.4", ] check = [ - "pyright==1.1.383", + "pyright==1.1.393", ] test = [ - "pytest==8.3.3", - "coverage==7.6.1", + "pytest==8.3.4", + "coverage==7.6.10", ] dev = [ - "pre-commit==4.0.0", - "debugpy==1.8.6", - "selenium==4.25.0", # used in daily tests, convenient for dev purpose (autocompletion) + "pre-commit==4.1.0", + "debugpy==1.8.12", + "selenium==4.28.1", # used in daily tests, convenient for dev purpose (autocompletion) "zimit[scripts]", "zimit[lint]", "zimit[test]", @@ -95,10 +95,10 @@ all = "inv checkall --args '{args}'" [tool.black] line-length = 88 -target-version = ['py312'] +target-version = ['py313'] [tool.ruff] -target-version = "py312" +target-version = "py313" line-length = 88 src = ["src"] @@ -221,5 +221,5 @@ exclude_lines = [ include = ["src", "tests", "tasks.py"] exclude = [".env/**", ".venv/**"] extraPaths = ["src"] -pythonVersion = "3.12" +pythonVersion = "3.13" typeCheckingMode="basic" diff --git a/tests-daily/Dockerfile b/tests-daily/Dockerfile index f6118fe..22d45ef 100644 --- a/tests-daily/Dockerfile +++ b/tests-daily/Dockerfile @@ -1,5 +1,5 @@ # Let's extract kiwix-tools as usual on alpine temporary build container -FROM alpine:3.18 as kiwix-serve +FROM alpine:3.21 as kiwix-serve LABEL org.opencontainers.image.source https://github.com/openzim/kiwix-tools # TARGETPLATFORM is injected by docker build @@ -30,7 +30,7 @@ RUN set -e && \ curl -k -L $url | tar -xz -C /kiwix-serve --strip-components 1 # Build real "workload" container -FROM python:3.12-slim-bookworm +FROM python:3.13-slim-bookworm # Add kiwix-serve COPY --from=kiwix-serve /kiwix-serve /usr/local/bin @@ -70,6 +70,6 @@ RUN rm /tmp/chrome-linux64.zip /tmp/chromedriver-linux64.zip /tmp/versions.json RUN \ python -m pip install --no-cache-dir -U \ pip \ - selenium==4.23.0 \ - pytest==8.2.2 \ + selenium==4.28.1 \ + pytest==8.3.4 \ && mkdir -p /work diff --git a/tests-integration/integration.py b/tests-integration/integration.py index 9d37b0f..b757e3d 100644 --- a/tests-integration/integration.py +++ b/tests-integration/integration.py @@ -17,7 +17,7 @@ def test_zim_main_page(): was a redirect to https Ensure main page is the redirected page""" - main_entry = Archive("/output/tests_en_onepage.zim").main_entry + main_entry = Archive(Path("/output/tests_en_onepage.zim")).main_entry assert main_entry.is_redirect assert ( main_entry.get_redirect_entry().path @@ -28,7 +28,7 @@ def test_zim_main_page(): def test_zim_scraper(): """Check content of scraper metadata""" - zim_fh = Archive("/output/tests_en_onepage.zim") + zim_fh = Archive(Path("/output/tests_en_onepage.zim")) scraper = zim_fh.get_text_metadata("Scraper") assert "zimit " in scraper assert "warc2zim " in scraper @@ -37,7 +37,7 @@ def test_zim_scraper(): def test_files_list(): """Check that expected files are present in the ZIM at proper path""" - zim_fh = Archive("/output/tests_en_onepage.zim") + zim_fh = Archive(Path("/output/tests_en_onepage.zim")) for expected_entry in [ "_zim_static/__wb_module_decl.js", "_zim_static/wombat.js", @@ -87,11 +87,12 @@ def test_user_agent(): def test_stats_output(): assert json.loads(Path("/output/crawl.json").read_bytes()) == { - "crawled": 35, + "crawled": 17, "pending": 0, "pendingPages": [], - "total": 35, - "failed": 18, + "total": 17, + "failed": 1, + "failedWillRetry": 17, "limit": {"max": 0, "hit": False}, }