Merge pull request #262 from openzim/warc2zim_update

This commit is contained in:
benoit74 2024-01-15 07:59:05 +01:00 committed by GitHub
commit eab3d1f189
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 2 deletions

View File

@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- Adapt to new `warc2zim` code structure
- Using `main` warc2zim ⚠️ change before releasing!
### Added
- New `--build` parameter (optional) to specify the directory holding Browsertrix files ; if not set, `--output`

View File

@ -8,7 +8,8 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/* \
# python setup (in venv not to conflict with browsertrix)
&& python3 -m venv /app/zimit \
&& /app/zimit/bin/python -m pip install --no-cache-dir 'requests==2.31.0' 'inotify==0.2.10' 'tld==0.13' 'warc2zim==1.5.4' \
&& /app/zimit/bin/python -m pip install --no-cache-dir 'requests==2.31.0' 'inotify==0.2.10' 'tld==0.13' \
'git+https://github.com/openzim/warc2zim@main#egg_name=warc2zim' \
# placeholder (default output location)
&& mkdir -p /output \
# disable chrome upgrade

View File

@ -25,7 +25,7 @@ import inotify
import inotify.adapters
import requests
from tld import get_fld
from warc2zim.main import warc2zim
from warc2zim.main import main as warc2zim
from zimscraperlib.uri import rebuild_uri
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15"