From 2e2db2f352f9e25627ead81f9e6addaad0b63bc6 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 16 Oct 2020 21:02:35 +0000 Subject: [PATCH] simplification: remove zimit user, su, and run chrome as root with --no-sandbox log exclusion regex --- Dockerfile | 7 ------- crawler.js | 5 ++++- zimit.py | 4 +--- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 667e245..56a4cbb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,17 +22,10 @@ RUN pip install git+https://github.com/webrecorder/pywb@patch-work COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/ COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/ -RUN useradd zimit --shell /bin/bash --create-home \ - && usermod -a -G sudo zimit \ - && echo 'ALL ALL = (ALL) NOPASSWD: ALL' >> /etc/sudoers \ - && echo 'zimit:secret' | chpasswd - WORKDIR /app ADD package.json /app/ -RUN chown -R zimit /app - RUN yarn install ADD config.yaml /app/ diff --git a/crawler.js b/crawler.js index 91da017..6656116 100644 --- a/crawler.js +++ b/crawler.js @@ -28,7 +28,8 @@ async function run(params) { // Chrome Flags, including proxy server const args = [ "--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically) - `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}` + `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`, + "--no-sandbox" ]; // prefix for direct capture via pywb @@ -313,6 +314,8 @@ async function main() { }) .argv; + console.log("Exclusions Regexes: ", params.exclude); + try { await run(params); process.exit(0); diff --git a/zimit.py b/zimit.py index d1773ca..da88823 100755 --- a/zimit.py +++ b/zimit.py @@ -96,7 +96,6 @@ def zimit(args=None): # make temp dir for this crawl and make it all writeable+all readable+all exec temp_root_dir = tempfile.mkdtemp(dir=zimit_args.output, prefix=".tmp") - os.chmod(temp_root_dir, stat.S_IROTH | stat.S_IWOTH | stat.S_IXOTH) if not zimit_args.keep: @@ -133,8 +132,7 @@ def zimit(args=None): print("") print("----------") print("running zimit driver: " + cmd_line) - su_cmd = ["su", "zimit", "-c", cmd_line] - subprocess.run(su_cmd, check=True) # nosec + subprocess.run(cmd_args, check=True) # nosec warc_files = glob.glob( os.path.join(temp_root_dir, "collections/capture/archive/*.warc.gz")