From f2522459833249076edb77292db1ef7979f6cb1d Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 22 Sep 2020 06:09:33 +0000 Subject: [PATCH] try using regular puppeteer, only copy deps from chrome image pywb: increase uwsgi processes, disable autoindex/autofetch for better perf --- Dockerfile | 12 ++++++------ config.yaml | 4 ++-- index.js | 6 +++--- package.json | 2 +- uwsgi.ini | 3 ++- yarn.lock | 6 +++--- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index af6955c..d51dfb6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,16 +10,16 @@ ENV PROXY_HOST=localhost \ RUN pip install pywb uwsgi warc2zim -COPY --from=chrome /opt/google/chrome/ /opt/google/chrome/ +#COPY --from=chrome /opt/google/chrome/ /opt/google/chrome/ -COPY --from=chrome /app/ /browser/ +#COPY --from=chrome /app/ /browser/ COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/ COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/ -COPY --from=chrome /usr/bin/run_forever /usr/bin/ -COPY --from=chrome /usr/bin/wget /usr/bin/ -COPY --from=chrome /usr/bin/certutil /usr/bin/ +#COPY --from=chrome /usr/bin/run_forever /usr/bin/ +#COPY --from=chrome /usr/bin/wget /usr/bin/ +#COPY --from=chrome /usr/bin/certutil /usr/bin/ -RUN ln -s /opt/google/chrome/google-chrome /usr/bin/google-chrome +#RUN ln -s /opt/google/chrome/google-chrome /usr/bin/google-chrome RUN useradd zimit --shell /bin/bash --create-home \ && usermod -a -G sudo zimit \ diff --git a/config.yaml b/config.yaml index 2250640..a072f5a 100644 --- a/config.yaml +++ b/config.yaml @@ -9,9 +9,9 @@ proxy: recorder: live -autoindex: 10 +#autoindex: 10 -enable_auto_fetch: true +#enable_auto_fetch: true collections: live: $live diff --git a/index.js b/index.js index d43eddd..812f6ae 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,4 @@ -const puppeteer = require("puppeteer-core"); +const puppeteer = require("puppeteer"); const { Cluster } = require("puppeteer-cluster"); const child_process = require("child_process"); const fetch = require("node-fetch"); @@ -20,14 +20,14 @@ async function run(params) { // Puppeter Options const puppeteerOptions = { headless: true, - executablePath: "/usr/bin/google-chrome", + //executablePath: "/usr/bin/google-chrome", ignoreHTTPSErrors: true, args }; // Puppeteer Cluster init and options const cluster = await Cluster.launch({ - concurrency: Cluster.CONCURRENCY_CONTEXT, + concurrency: Cluster.CONCURRENCY_PAGE, maxConcurrency: Number(params.workers) || 1, skipDuplicateUrls: true, puppeteerOptions, diff --git a/package.json b/package.json index c304660..47efcbb 100644 --- a/package.json +++ b/package.json @@ -8,8 +8,8 @@ "dependencies": { "abort-controller": "^3.0.0", "node-fetch": "^2.6.1", + "puppeteer": "^5.3.0", "puppeteer-cluster": "^0.22.0", - "puppeteer-core": "^5.3.0", "yargs": "^16.0.3" } } diff --git a/uwsgi.ini b/uwsgi.ini index f415aa3..1ded14e 100644 --- a/uwsgi.ini +++ b/uwsgi.ini @@ -12,13 +12,14 @@ if-env = VIRTUAL_ENV venv = $(VIRTUAL_ENV) endif = -gevent = 100 +gevent = 200 #Not available until uwsgi 2.1 #monkey-patching manually in pywb.apps.wayback #gevent-early-monkey-patch = # for uwsgi<2.1, set env when using gevent env = GEVENT_MONKEY_PATCH=1 +processes = 8 # specify config file here env = PYWB_CONFIG_FILE=config.yaml diff --git a/yarn.lock b/yarn.lock index d793a9a..458e423 100644 --- a/yarn.lock +++ b/yarn.lock @@ -347,10 +347,10 @@ puppeteer-cluster@^0.22.0: dependencies: debug "^4.1.1" -puppeteer-core@^5.3.0: +puppeteer@^5.3.0: version "5.3.0" - resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-5.3.0.tgz#36d2e10132632c9cb73007f3f2880f4e6b655977" - integrity sha512-+4wk+0dcDNg7AQqN41Q9r41U6iltAtknuVBI0aj0O/Vp8/4orgbFV0wn55wV5xRae//CucLPUnaczxZx7dz0UA== + resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-5.3.0.tgz#0abf83d0f2d1273baf2b56885a813f8052903e33" + integrity sha512-GjqMk5GRro3TO0sw3QMsF1H7n+/jaK2OW45qMvqjYUyJ7y4oA//9auy969HHhTG3HZXaMxY/NWXF/NXlAFIvtw== dependencies: debug "^4.1.0" devtools-protocol "0.0.799653"