Merge pull request #262 from openzim/warc2zim_update

This commit is contained in:
benoit74 2024-01-15 07:59:05 +01:00 committed by GitHub
commit eab3d1f189
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 2 deletions

View File

@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Changed
- Adapt to new `warc2zim` code structure
- Using `main` warc2zim ⚠️ change before releasing!
### Added ### Added
- New `--build` parameter (optional) to specify the directory holding Browsertrix files ; if not set, `--output` - New `--build` parameter (optional) to specify the directory holding Browsertrix files ; if not set, `--output`

View File

@ -8,7 +8,8 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
# python setup (in venv not to conflict with browsertrix) # python setup (in venv not to conflict with browsertrix)
&& python3 -m venv /app/zimit \ && python3 -m venv /app/zimit \
&& /app/zimit/bin/python -m pip install --no-cache-dir 'requests==2.31.0' 'inotify==0.2.10' 'tld==0.13' 'warc2zim==1.5.4' \ && /app/zimit/bin/python -m pip install --no-cache-dir 'requests==2.31.0' 'inotify==0.2.10' 'tld==0.13' \
'git+https://github.com/openzim/warc2zim@main#egg_name=warc2zim' \
# placeholder (default output location) # placeholder (default output location)
&& mkdir -p /output \ && mkdir -p /output \
# disable chrome upgrade # disable chrome upgrade

View File

@ -25,7 +25,7 @@ import inotify
import inotify.adapters import inotify.adapters
import requests import requests
from tld import get_fld from tld import get_fld
from warc2zim.main import warc2zim from warc2zim.main import main as warc2zim
from zimscraperlib.uri import rebuild_uri from zimscraperlib.uri import rebuild_uri
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15" DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15"