Upgrade Python 3.13, Crawler 1.5.0 and others

This commit is contained in:
benoit74 2025-02-04 15:12:49 +00:00
parent 0cb84f2126
commit 0f136d2f2f
No known key found for this signature in database
GPG Key ID: B89606434FC7B530
6 changed files with 34 additions and 27 deletions

View File

@ -2,20 +2,20 @@
# See https://pre-commit.com/hooks.html for more hooks # See https://pre-commit.com/hooks.html for more hooks
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0 rev: v5.0.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
- id: end-of-file-fixer - id: end-of-file-fixer
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: "24.10.0" rev: "25.1.0"
hooks: hooks:
- id: black - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9 rev: v0.9.4
hooks: hooks:
- id: ruff - id: ruff
- repo: https://github.com/RobertCraigie/pyright-python - repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.383 rev: v1.1.393
hooks: hooks:
- id: pyright - id: pyright
name: pyright (system) name: pyright (system)

View File

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Changed
- Upgrade to browsertrix crawler 1.5.0, Python 3.13 and others (#462)
## [2.1.7] - 2024-01-10 ## [2.1.7] - 2024-01-10
### Changed ### Changed

View File

@ -1,13 +1,16 @@
FROM webrecorder/browsertrix-crawler:1.4.2 FROM webrecorder/browsertrix-crawler:1.5.0
LABEL org.opencontainers.image.source=https://github.com/openzim/zimit LABEL org.opencontainers.image.source=https://github.com/openzim/zimit
# add deadsnakes ppa for latest Python on Ubuntu
RUN add-apt-repository ppa:deadsnakes/ppa -y
RUN apt-get update \ RUN apt-get update \
&& apt-get install -qqy --no-install-recommends \ && apt-get install -qqy --no-install-recommends \
libmagic1 \ libmagic1 \
python3.12-venv \ python3.13-venv \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
# python setup (in venv not to conflict with browsertrix) # python setup (in venv not to conflict with browsertrix)
&& python3.12 -m venv /app/zimit \ && python3.13 -m venv /app/zimit \
# placeholder (default output location) # placeholder (default output location)
&& mkdir -p /output \ && mkdir -p /output \
# disable chrome upgrade # disable chrome upgrade

View File

@ -1,10 +1,10 @@
[build-system] [build-system]
requires = ["hatchling", "hatch-openzim==0.2.0"] requires = ["hatchling", "hatch-openzim"]
build-backend = "hatchling.build" build-backend = "hatchling.build"
[project] [project]
name = "zimit" name = "zimit"
requires-python = ">=3.12,<3.13" requires-python = ">=3.13,<3.14"
description = "Make ZIM file from any website through crawling" description = "Make ZIM file from any website through crawling"
readme = "README.md" readme = "README.md"
dependencies = [ dependencies = [
@ -26,20 +26,20 @@ scripts = [
"invoke==2.2.0", "invoke==2.2.0",
] ]
lint = [ lint = [
"black==24.10.0", "black==25.1.0",
"ruff==0.6.9", "ruff==0.9.4",
] ]
check = [ check = [
"pyright==1.1.383", "pyright==1.1.393",
] ]
test = [ test = [
"pytest==8.3.3", "pytest==8.3.4",
"coverage==7.6.1", "coverage==7.6.10",
] ]
dev = [ dev = [
"pre-commit==4.0.0", "pre-commit==4.1.0",
"debugpy==1.8.6", "debugpy==1.8.12",
"selenium==4.25.0", # used in daily tests, convenient for dev purpose (autocompletion) "selenium==4.28.1", # used in daily tests, convenient for dev purpose (autocompletion)
"zimit[scripts]", "zimit[scripts]",
"zimit[lint]", "zimit[lint]",
"zimit[test]", "zimit[test]",
@ -95,10 +95,10 @@ all = "inv checkall --args '{args}'"
[tool.black] [tool.black]
line-length = 88 line-length = 88
target-version = ['py312'] target-version = ['py313']
[tool.ruff] [tool.ruff]
target-version = "py312" target-version = "py313"
line-length = 88 line-length = 88
src = ["src"] src = ["src"]
@ -221,5 +221,5 @@ exclude_lines = [
include = ["src", "tests", "tasks.py"] include = ["src", "tests", "tasks.py"]
exclude = [".env/**", ".venv/**"] exclude = [".env/**", ".venv/**"]
extraPaths = ["src"] extraPaths = ["src"]
pythonVersion = "3.12" pythonVersion = "3.13"
typeCheckingMode="basic" typeCheckingMode="basic"

View File

@ -1,5 +1,5 @@
# Let's extract kiwix-tools as usual on alpine temporary build container # Let's extract kiwix-tools as usual on alpine temporary build container
FROM alpine:3.18 as kiwix-serve FROM alpine:3.21 as kiwix-serve
LABEL org.opencontainers.image.source https://github.com/openzim/kiwix-tools LABEL org.opencontainers.image.source https://github.com/openzim/kiwix-tools
# TARGETPLATFORM is injected by docker build # TARGETPLATFORM is injected by docker build
@ -30,7 +30,7 @@ RUN set -e && \
curl -k -L $url | tar -xz -C /kiwix-serve --strip-components 1 curl -k -L $url | tar -xz -C /kiwix-serve --strip-components 1
# Build real "workload" container # Build real "workload" container
FROM python:3.12-slim-bookworm FROM python:3.13-slim-bookworm
# Add kiwix-serve # Add kiwix-serve
COPY --from=kiwix-serve /kiwix-serve /usr/local/bin COPY --from=kiwix-serve /kiwix-serve /usr/local/bin
@ -70,6 +70,6 @@ RUN rm /tmp/chrome-linux64.zip /tmp/chromedriver-linux64.zip /tmp/versions.json
RUN \ RUN \
python -m pip install --no-cache-dir -U \ python -m pip install --no-cache-dir -U \
pip \ pip \
selenium==4.23.0 \ selenium==4.28.1 \
pytest==8.2.2 \ pytest==8.3.4 \
&& mkdir -p /work && mkdir -p /work

View File

@ -17,7 +17,7 @@ def test_zim_main_page():
was a redirect to https was a redirect to https
Ensure main page is the redirected page""" Ensure main page is the redirected page"""
main_entry = Archive("/output/tests_en_onepage.zim").main_entry main_entry = Archive(Path("/output/tests_en_onepage.zim")).main_entry
assert main_entry.is_redirect assert main_entry.is_redirect
assert ( assert (
main_entry.get_redirect_entry().path main_entry.get_redirect_entry().path
@ -28,7 +28,7 @@ def test_zim_main_page():
def test_zim_scraper(): def test_zim_scraper():
"""Check content of scraper metadata""" """Check content of scraper metadata"""
zim_fh = Archive("/output/tests_en_onepage.zim") zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
scraper = zim_fh.get_text_metadata("Scraper") scraper = zim_fh.get_text_metadata("Scraper")
assert "zimit " in scraper assert "zimit " in scraper
assert "warc2zim " in scraper assert "warc2zim " in scraper
@ -37,7 +37,7 @@ def test_zim_scraper():
def test_files_list(): def test_files_list():
"""Check that expected files are present in the ZIM at proper path""" """Check that expected files are present in the ZIM at proper path"""
zim_fh = Archive("/output/tests_en_onepage.zim") zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
for expected_entry in [ for expected_entry in [
"_zim_static/__wb_module_decl.js", "_zim_static/__wb_module_decl.js",
"_zim_static/wombat.js", "_zim_static/wombat.js",