mirror of
https://github.com/openzim/zimit.git
synced 2025-08-03 10:16:08 -04:00
50 lines
1.8 KiB
Docker
50 lines
1.8 KiB
Docker
FROM webrecorder/browsertrix-crawler:1.5.0
|
|
LABEL org.opencontainers.image.source=https://github.com/openzim/zimit
|
|
|
|
# add deadsnakes ppa for latest Python on Ubuntu
|
|
RUN add-apt-repository ppa:deadsnakes/ppa -y
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -qqy --no-install-recommends \
|
|
libmagic1 \
|
|
python3.13-venv \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
# python setup (in venv not to conflict with browsertrix)
|
|
&& python3.13 -m venv /app/zimit \
|
|
# placeholder (default output location)
|
|
&& mkdir -p /output \
|
|
# disable chrome upgrade
|
|
&& printf "repo_add_once=\"false\"\nrepo_reenable_on_distupgrade=\"false\"\n" > /etc/default/google-chrome \
|
|
# download list of bad domains to filter-out. intentionnaly ran post-install \
|
|
# so it's not cached in earlier layers (url stays same but content updated) \
|
|
&& mkdir -p /tmp/ads \
|
|
&& cd /tmp/ads \
|
|
&& curl -L -O https://hosts.anudeep.me/mirror/adservers.txt \
|
|
&& curl -L -O https://hosts.anudeep.me/mirror/CoinMiner.txt \
|
|
&& curl -L -O https://hosts.anudeep.me/mirror/facebook.txt \
|
|
&& cat ./*.txt > /etc/blocklist.txt \
|
|
&& rm ./*.txt \
|
|
&& printf '#!/bin/sh\ncat /etc/blocklist.txt >> /etc/hosts\nexec "$@"' > /usr/local/bin/entrypoint.sh \
|
|
&& chmod +x /usr/local/bin/entrypoint.sh
|
|
|
|
# Copy pyproject.toml and its dependencies
|
|
COPY pyproject.toml README.md /src/
|
|
COPY src/zimit/__about__.py /src/src/zimit/__about__.py
|
|
|
|
# Install Python dependencies
|
|
RUN . /app/zimit/bin/activate && python -m pip install --no-cache-dir /src
|
|
|
|
# Copy code + associated artifacts
|
|
COPY src /src/src
|
|
COPY *.md /src/
|
|
|
|
# Install + cleanup
|
|
RUN . /app/zimit/bin/activate && python -m pip install --no-cache-dir /src \
|
|
&& ln -s /app/zimit/bin/zimit /usr/bin/zimit \
|
|
&& ln -s /app/zimit/bin/warc2zim /usr/bin/warc2zim \
|
|
&& chmod +x /usr/bin/zimit \
|
|
&& rm -rf /src
|
|
|
|
ENTRYPOINT ["entrypoint.sh"]
|
|
CMD ["zimit", "--help"]
|