From c228c8300cca13dd09bda5edbda7482b48c44e81 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 2 Nov 2020 15:36:28 +0000 Subject: [PATCH] split zimit from core browsertrix-crawler, which has been moved to https://github.com/webrecorder/browsertrix-crawler use versioned browsertrix-crawler:0.1.0 image part of #45 --- .gitignore | 1 + Dockerfile | 34 +--- README.md | 12 +- autoplay.js | 89 --------- config.yaml | 21 --- crawler.js | 436 -------------------------------------------- package.json | 15 -- uwsgi.ini | 27 --- yarn.lock | 498 --------------------------------------------------- zimit.py | 26 +-- 10 files changed, 14 insertions(+), 1145 deletions(-) delete mode 100644 autoplay.js delete mode 100644 config.yaml delete mode 100644 crawler.js delete mode 100644 package.json delete mode 100644 uwsgi.ini delete mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore index cbbc2e5..7da9385 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__ *.egg-info/ collections/ node_modules/ +output/ diff --git a/Dockerfile b/Dockerfile index 013a42c..82e0bfa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,40 +1,10 @@ -FROM oldwebtoday/chrome:84 as chrome - -FROM nikolaik/python-nodejs:python3.8-nodejs14 - -RUN apt-get update -y \ - && apt-get install --no-install-recommends -qqy fonts-stix locales-all redis-server \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -ENV PROXY_HOST=localhost \ - PROXY_PORT=8080 \ - PROXY_CA_URL=http://wsgiprox/download/pem \ - PROXY_CA_FILE=/tmp/proxy-ca.pem \ - NO_SOCAT=1 - -RUN pip install gevent>=20.9.0 uwsgi - -#RUN pip install git+https://github.com/openzim/warc2zim@fuzzy-match -RUN pip install 'warc2zim>=1.3.0' - -RUN pip install git+https://github.com/webrecorder/pywb@patch-work - -COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/ -COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/ -COPY --from=chrome /opt/google/chrome/ /opt/google/chrome/ +FROM webrecorder/browsertrix-crawler:0.1.0 WORKDIR /app -ADD package.json /app/ +RUN pip install 'warc2zim>=1.3.0' -RUN yarn install - -ADD config.yaml /app/ -ADD uwsgi.ini /app/ ADD zimit.py /app/ -ADD crawler.js /app/ -ADD autoplay.js /app/ RUN ln -s /app/zimit.py /usr/bin/zimit diff --git a/README.md b/README.md index e9ccf7c..6d76f00 100644 --- a/README.md +++ b/README.md @@ -12,17 +12,16 @@ Technical background This version of Zimit runs a single-site headless-Chrome based crawl in a Docker container and produces a ZIM of the crawled content. -The system uses: - - `oldwebtoday/chrome` - to install a recent version of Chrome 84 - - `puppeteer-cluster` - for running Chrome browsers in parallel - - `pywb` - in recording mode for capturing the content - - `warc2zim` - to convert the crawled WARC files into a ZIM +The system extends the crawling system in [Browsertrix Crawler](https://github.com/webrecorder/browsertrix-crawler) and converts +the crawled WARC files to ZIM using [warc2zim](https://github.com/openzim/warc2zim) -The driver in `index.js` crawls a given URL using puppeteer-cluster. +The `zimit.py` is the entrypoint for the system. After the crawl is done, warc2zim is used to write a zim to the `/output` directory, which can be mounted as a volume. +Using the `--keep` flag, the crawled WARCs will also be kept in a temp directory inside `/output` + Usage ----- @@ -44,6 +43,7 @@ The image accepts the following parameters: - `--limit U` - Limit capture to at most U URLs - `--exclude ` - skip URLs that match the regex from crawling. Can be specified multiple times. - `--scroll [N]` - if set, will activate a simple auto-scroll behavior on each page to scroll for upto N seconds +- `--keep` - if set, keep the WARC files in a temp directory inside the output directory The following is an example usage. The `--cap-add` and `--shm-size` flags are [needed to run Chrome in Docker](https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips). diff --git a/autoplay.js b/autoplay.js deleted file mode 100644 index d36b958..0000000 --- a/autoplay.js +++ /dev/null @@ -1,89 +0,0 @@ -(() => { - - function run() { - if (window.navigator.__crawler_autoplay) { - return; - } - - //console.log("checking autoplay for " + document.location.href); - window.navigator.__crawler_autoplay = true; - - const specialActions = [ - { - rx: /w\.soundcloud\.com/, - check(url) { - const autoplay = url.searchParams.get('auto_play'); - return autoplay === 'true'; - }, - handle(url) { - url.searchParams.set('auto_play', 'true'); - // set continuous_play to true in order to handle - // a playlist etc - url.searchParams.set('continuous_play', 'true'); - self.location.href = url.href; - }, - }, - { - rx: [/player\.vimeo\.com/, /youtube\.com\/embed\//], - check(url) { - const autoplay = url.searchParams.get('autoplay'); - return autoplay === '1'; - }, - handle(url) { - url.searchParams.set('autoplay', '1'); - if (window.__crawler_autoplayLoad) { - window.__crawler_autoplayLoad(url.href); - } - self.location.href = url.href; - }, - }, - ]; - const url = new URL(self.location.href); - for (let i = 0; i < specialActions.length; i++) { - if (Array.isArray(specialActions[i].rx)) { - const rxs = specialActions[i].rx; - for (let j = 0; j < rxs.length; j++) { - if (url.href.search(rxs[j]) >= 0) { - if (specialActions[i].check(url)) return; - return specialActions[i].handle(url); - } - } - } else if (url.href.search(specialActions[i].rx) >= 0) { - if (specialActions[i].check(url)) return; - return specialActions[i].handle(url); - } - } - } - - document.addEventListener("readystatechange", run); - - if (document.readyState === "complete") { - run(); - } - - - const mediaSet = new Set(); - - setInterval(() => { - const medias = document.querySelectorAll("video, audio"); - - for (const media of medias) { - try { - if (media.src && !mediaSet.has(media.src)) { - if (window.__crawler_queueUrls && (media.src.startsWith("http:") || media.src.startsWith("https:"))) { - window.__crawler_queueUrls(media.src); - } - mediaSet.add(media.src); - } else if (!media.src) { - media.play(); - } - } catch(e) { - console.log(e); - } - } - }, 3000); - - - -})(); - diff --git a/config.yaml b/config.yaml deleted file mode 100644 index f9d3943..0000000 --- a/config.yaml +++ /dev/null @@ -1,21 +0,0 @@ -debug: true - - -proxy: - coll: capture - recording: true - - enable_banner: false - enable_wombat: true - -recorder: - dedup_policy: skip - source_coll: live - cache: always - -#autoindex: 10 - -enable_auto_fetch: true - -collections: - live: $live diff --git a/crawler.js b/crawler.js deleted file mode 100644 index 4808f4c..0000000 --- a/crawler.js +++ /dev/null @@ -1,436 +0,0 @@ -const fs = require("fs"); -const puppeteer = require("puppeteer-core"); -const { Cluster } = require("puppeteer-cluster"); -const child_process = require("child_process"); -const fetch = require("node-fetch"); -const AbortController = require("abort-controller"); - -const HTML_TYPES = ["text/html", "application/xhtml", "application/xhtml+xml"]; -const WAIT_UNTIL_OPTS = ["load", "domcontentloaded", "networkidle0", "networkidle2"]; -const NEW_CONTEXT_OPTS = ["page", "session", "browser"]; -const CHROME_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"; - -// to ignore HTTPS error for HEAD check -const HTTPS_AGENT = require("https").Agent({ - rejectUnauthorized: false, -}); - -process.once('SIGINT', (code) => { - console.log('SIGINT received, exiting'); - process.exit(1); -}); - -process.once('SIGTERM', (code) => { - console.log('SIGTERM received, exiting'); - process.exit(1); -}); - - -const autoplayScript = fs.readFileSync("./autoplay.js", "utf-8"); - - -// prefix for direct capture via pywb -const capturePrefix = `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}/capture/record/id_/`; -const headers = {"User-Agent": CHROME_USER_AGENT}; - - -async function run(params) { - // Chrome Flags, including proxy server - const args = [ - "--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically) - `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`, - "--no-sandbox", - "--disable-background-media-suspend", - "--autoplay-policy=no-user-gesture-required", - ]; - - // Puppeter Options - const puppeteerOptions = { - headless: true, - executablePath: "/opt/google/chrome/google-chrome", - ignoreHTTPSErrors: true, - args - }; - - // params - const { url, waitUntil, timeout, scope, limit, exclude, scroll, newContext } = params; - - let concurrency = Cluster.CONCURRENCY_PAGE; - - switch (newContext) { - case "page": - concurrency = Cluster.CONCURRENCY_PAGE; - break; - - case "session": - concurrency = Cluster.CONCURRENCY_CONTEXT; - break; - - case "browser": - concurrency = Cluster.CONCURRENCY_BROWSER; - break; - } - - // Puppeteer Cluster init and options - const cluster = await Cluster.launch({ - concurrency, - maxConcurrency: Number(params.workers) || 1, - skipDuplicateUrls: true, - // total timeout for cluster - timeout: timeout * 2, - puppeteerOptions, - puppeteer, - monitor: true - }); - - // Maintain own seen list - const seenList = new Set(); - - //console.log("Limit: " + limit); - - // links crawled counter - let numLinks = 0; - - // Crawl Task - cluster.task(async ({page, data}) => { - const {url} = data; - - if (!await htmlCheck(url, capturePrefix)) { - return; - } - - //page.on('console', message => console.log(`${message.type()} ${message.text()}`)); - //page.on('pageerror', message => console.warn(message)); - //page.on('error', message => console.warn(message)); - //page.on('requestfailed', message => console.warn(message._failureText)); - const mediaResults = []; - - await page.exposeFunction('__crawler_queueUrls', (url) => { - mediaResults.push(directCapture(url)); - }); - - let waitForVideo = false; - - await page.exposeFunction('__crawler_autoplayLoad', (url) => { - console.log("*** Loading autoplay URL: " + url); - waitForVideo = true; - }); - - try { - await page.evaluateOnNewDocument(autoplayScript); - } catch(e) { - console.log(e); - } - - try { - await page.goto(url, {waitUntil, timeout}); - } catch (e) { - console.log(`Load timeout for ${url}`); - } - - try { - await Promise.all(mediaResults); - } catch (e) { - console.log(`Error loading media URLs`, e); - } - - if (waitForVideo) { - console.log("Extra wait 15s for video loading"); - await sleep(15000); - } - - if (scroll) { - try { - await Promise.race([page.evaluate(autoScroll), sleep(30000)]); - } catch (e) { - console.warn("Behavior Failed", e); - } - } - - let results = null; - - try { - results = await page.evaluate(() => { - return [...document.querySelectorAll('a[href]')].map(el => ({ url: el.href})) - }); - } catch (e) { - console.warn("Link Extraction failed", e); - return; - } - - try { - for (data of results) { - const newUrl = shouldCrawl(scope, seenList, data.url, exclude); - - if (newUrl) { - seenList.add(newUrl); - if (numLinks++ >= limit && limit > 0) { - break; - } - cluster.queue({url: newUrl}); - } - } - } catch (e) { - console.log("Queuing Error: " + e); - } - }); - - numLinks++; - cluster.queue({url}); - - await cluster.idle(); - await cluster.close(); - - // extra wait for all resources to land into WARCs - console.log("Waiting 30s to ensure WARCs are finished"); - await sleep(30000); -} - - -function shouldCrawl(scope, seenList, url, exclude) { - try { - url = new URL(url); - } catch(e) { - return false; - } - - // remove hashtag - url.hash = ""; - - // only queue http/https URLs - if (url.protocol != "http:" && url.protocol != "https:") { - return false; - } - - url = url.href; - - // skip already crawled - if (seenList.has(url)) { - return false; - } - - let inScope = false; - - // check scopes - for (const s of scope) { - if (s.exec(url)) { - inScope = true; - break; - } - } - - if (!inScope) { - //console.log(`Not in scope ${url} ${scope}`); - return false; - } - - // check exclusions - for (const e of exclude) { - if (e.exec(url)) { - //console.log(`Skipping ${url} excluded by ${e}`); - return false; - } - } - - return url; -} - -async function htmlCheck(url, capturePrefix) { - try { - const agent = url.startsWith("https:") ? HTTPS_AGENT : null; - - const resp = await fetch(url, {method: "HEAD", headers, agent}); - - if (resp.status >= 400) { - console.log(`Skipping ${url}, invalid status ${resp.status}`); - return false; - } - - const contentType = resp.headers.get("Content-Type"); - - // just load if no content-type - if (!contentType) { - return true; - } - - const mime = contentType.split(";")[0]; - - if (HTML_TYPES.includes(mime)) { - return true; - } - - // capture directly - await directCapture(url); - - return false; - } catch(e) { - console.log("HTML Check error", e); - // can't confirm not html, so try in browser - return true; - } -} - -async function directCapture(url) { - console.log(`Direct capture: ${capturePrefix}${url}`); - const abort = new AbortController(); - const signal = abort.signal; - const resp2 = await fetch(capturePrefix + url, {signal, headers}); - abort.abort(); -} - - - -async function autoScroll() { - const canScrollMore = () => - self.scrollY + self.innerHeight < - Math.max( - self.document.body.scrollHeight, - self.document.body.offsetHeight, - self.document.documentElement.clientHeight, - self.document.documentElement.scrollHeight, - self.document.documentElement.offsetHeight - ); - - const scrollOpts = { top: 250, left: 0, behavior: 'auto' }; - - while (canScrollMore()) { - self.scrollBy(scrollOpts); - await new Promise(resolve => setTimeout(resolve, 500)); - } -} - -function sleep(time) { - return new Promise(resolve => setTimeout(resolve, time)); -} - - -async function main() { - const params = require('yargs') - .usage("browsertrix-mini [options]") - .options({ - "url": { - alias: "u", - describe: "The URL to start crawling from", - demandOption: true, - type: "string", - }, - - "workers": { - alias: "w", - describe: "The number of workers to run in parallel", - demandOption: false, - default: 1, - type: "number", - }, - - "newContext": { - describe: "The context for each new capture, can be a new: page, session or browser.", - default: "page", - type: "string" - }, - - "waitUntil": { - describe: "Puppeteer page.goto() condition to wait for before continuing", - default: "load", - }, - - "limit": { - describe: "Limit crawl to this number of pages", - default: 0, - type: "number", - }, - - "timeout": { - describe: "Timeout for each page to load (in seconds)", - default: 90, - type: "number", - }, - - "scope": { - describe: "Regex of page URLs that should be included in the crawl (defaults to the immediate directory of URL)", - }, - - "exclude": { - describe: "Regex of page URLs that should be excluded from the crawl." - }, - - "scroll": { - describe: "If set, will autoscroll to bottom of the page", - type: "boolean", - default: false, - - }}).check((argv, option) => { - // Scope for crawl, default to the domain of the URL - const url = new URL(argv.url); - - if (url.protocol !== "http:" && url.protocol != "https:") { - throw new Error("URL must start with http:// or https://"); - } - - // ensure valid url is used (adds trailing slash if missing) - argv.url = url.href; - - if (!argv.scope) { - //argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1); - argv.scope = [new RegExp("^" + rxEscape(url.href.slice(0, url.href.lastIndexOf("/") + 1)))]; - } - - argv.timeout *= 1000; - - // waitUntil condition must be: load, domcontentloaded, networkidle0, networkidle2 - // (see: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options) - if (!WAIT_UNTIL_OPTS.includes(argv.waitUntil)) { - throw new Error("Invalid waitUntil, must be one of: " + WAIT_UNTIL_OPTS.join(",")); - } - - if (!NEW_CONTEXT_OPTS.includes(argv.newContext)) { - throw new Error("Invalid newContext, must be one of: " + NEW_CONTEXT_OPTS.join(",")); - } - - // Support one or multiple exclude - if (argv.exclude) { - if (typeof(argv.exclude) === "string") { - argv.exclude = [new RegExp(argv.exclude)]; - } else { - argv.exclude = argv.exclude.map(e => new RegExp(e)); - } - } else { - argv.exclude = []; - } - - // Support one or multiple scopes - if (argv.scope) { - if (typeof(argv.scope) === "string") { - argv.scope = [new RegExp(argv.scope)]; - } else { - argv.scope = argv.scope.map(e => new RegExp(e)); - } - } else { - argv.scope = []; - } - - return true; - }) - .argv; - - console.log("Exclusions Regexes: ", params.exclude); - console.log("Scope Regexes: ", params.scope); - - try { - await run(params); - process.exit(0); - } catch(e) { - console.error("Crawl failed"); - console.error(e); - process.exit(1); - } -} - -function rxEscape(string) { - return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); -} - - -main(); - - diff --git a/package.json b/package.json deleted file mode 100644 index 3820178..0000000 --- a/package.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "name": "zimit-crawler", - "version": "1.0.0", - "main": "zimit-crawler", - "repository": "https://github.com/openzim/zimit", - "author": "Ilya Kreymer ", - "license": "MIT", - "dependencies": { - "abort-controller": "^3.0.0", - "node-fetch": "^2.6.1", - "puppeteer-cluster": "^0.22.0", - "puppeteer-core": "^5.3.1", - "yargs": "^16.0.3" - } -} diff --git a/uwsgi.ini b/uwsgi.ini deleted file mode 100644 index 0f46569..0000000 --- a/uwsgi.ini +++ /dev/null @@ -1,27 +0,0 @@ -[uwsgi] -if-not-env = PORT -http-socket = :8080 -socket = :8081 -endif = - -master = true -buffer-size = 65536 -die-on-term = true - -if-env = VIRTUAL_ENV -venv = $(VIRTUAL_ENV) -endif = - -gevent = 200 - -#Not available until uwsgi 2.1 -#monkey-patching manually in pywb.apps.wayback -#gevent-early-monkey-patch = -# for uwsgi<2.1, set env when using gevent -env = GEVENT_MONKEY_PATCH=1 -processes = 8 - -# specify config file here -env = PYWB_CONFIG_FILE=/app/config.yaml -wsgi = pywb.apps.wayback - diff --git a/yarn.lock b/yarn.lock deleted file mode 100644 index 91ceab7..0000000 --- a/yarn.lock +++ /dev/null @@ -1,498 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -"@types/color-name@^1.1.1": - version "1.1.1" - resolved "https://registry.yarnpkg.com/@types/color-name/-/color-name-1.1.1.tgz#1c1261bbeaa10a8055bbc5d8ab84b7b2afc846a0" - integrity sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ== - -"@types/node@*": - version "14.14.0" - resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.0.tgz#f1091b6ad5de18e8e91bdbd43ec63f13de372538" - integrity sha512-BfbIHP9IapdupGhq/hc+jT5dyiBVZ2DdeC5WwJWQWDb0GijQlzUFAeIQn/2GtvZcd2HVUU7An8felIICFTC2qg== - -"@types/yauzl@^2.9.1": - version "2.9.1" - resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.9.1.tgz#d10f69f9f522eef3cf98e30afb684a1e1ec923af" - integrity sha512-A1b8SU4D10uoPjwb0lnHmmu8wZhR9d+9o2PKBQT2jU5YPTKsxac6M2qGAdY7VcL+dHHhARVUDmeg0rOrcd9EjA== - dependencies: - "@types/node" "*" - -abort-controller@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" - integrity sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg== - dependencies: - event-target-shim "^5.0.0" - -agent-base@5: - version "5.1.1" - resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-5.1.1.tgz#e8fb3f242959db44d63be665db7a8e739537a32c" - integrity sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g== - -ansi-regex@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75" - integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg== - -ansi-styles@^4.0.0: - version "4.2.1" - resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.2.1.tgz#90ae75c424d008d2624c5bf29ead3177ebfcf359" - integrity sha512-9VGjrMsG1vePxcSweQsN20KY/c4zN0h9fLjqAbwbPfahM3t+NL+M9HC8xeXG2I8pX5NoamTGNuomEUFI7fcUjA== - dependencies: - "@types/color-name" "^1.1.1" - color-convert "^2.0.1" - -balanced-match@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" - integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c= - -base64-js@^1.0.2: - version "1.3.1" - resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.3.1.tgz#58ece8cb75dd07e71ed08c736abc5fac4dbf8df1" - integrity sha512-mLQ4i2QO1ytvGWFWmcngKO//JXAQueZvwEKtjgQFM4jIK0kU+ytMfplL8j+n5mspOfjHwoAg+9yhb7BwAHm36g== - -bl@^4.0.3: - version "4.0.3" - resolved "https://registry.yarnpkg.com/bl/-/bl-4.0.3.tgz#12d6287adc29080e22a705e5764b2a9522cdc489" - integrity sha512-fs4G6/Hu4/EE+F75J8DuN/0IpQqNjAdC7aEQv7Qt8MHGUH7Ckv2MwTEEeN9QehD0pfIDkMI1bkHYkKy7xHyKIg== - dependencies: - buffer "^5.5.0" - inherits "^2.0.4" - readable-stream "^3.4.0" - -brace-expansion@^1.1.7: - version "1.1.11" - resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" - integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== - dependencies: - balanced-match "^1.0.0" - concat-map "0.0.1" - -buffer-crc32@~0.2.3: - version "0.2.13" - resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" - integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI= - -buffer@^5.2.1, buffer@^5.5.0: - version "5.6.0" - resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.6.0.tgz#a31749dc7d81d84db08abf937b6b8c4033f62786" - integrity sha512-/gDYp/UtU0eA1ys8bOs9J6a+E/KWIY+DZ+Q2WESNUA0jFRsJOc0SNUO6xJ5SGA1xueg3NL65W6s+NY5l9cunuw== - dependencies: - base64-js "^1.0.2" - ieee754 "^1.1.4" - -chownr@^1.1.1: - version "1.1.4" - resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b" - integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg== - -cliui@^7.0.0: - version "7.0.1" - resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.1.tgz#a4cb67aad45cd83d8d05128fc9f4d8fbb887e6b3" - integrity sha512-rcvHOWyGyid6I1WjT/3NatKj2kDt9OdSHSXpyLXaMWFbKpGACNW8pRhhdPUq9MWUOdwn8Rz9AVETjF4105rZZQ== - dependencies: - string-width "^4.2.0" - strip-ansi "^6.0.0" - wrap-ansi "^7.0.0" - -color-convert@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" - integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== - dependencies: - color-name "~1.1.4" - -color-name@~1.1.4: - version "1.1.4" - resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" - integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== - -concat-map@0.0.1: - version "0.0.1" - resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" - integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s= - -debug@4, debug@^4.1.0, debug@^4.1.1: - version "4.2.0" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.2.0.tgz#7f150f93920e94c58f5574c2fd01a3110effe7f1" - integrity sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg== - dependencies: - ms "2.1.2" - -devtools-protocol@0.0.799653: - version "0.0.799653" - resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.799653.tgz#86fc95ce5bf4fdf4b77a58047ba9d2301078f119" - integrity sha512-t1CcaZbvm8pOlikqrsIM9GOa7Ipp07+4h/q9u0JXBWjPCjHdBl9KkddX87Vv9vBHoBGtwV79sYQNGnQM6iS5gg== - -emoji-regex@^8.0.0: - version "8.0.0" - resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" - integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== - -end-of-stream@^1.1.0, end-of-stream@^1.4.1: - version "1.4.4" - resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" - integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q== - dependencies: - once "^1.4.0" - -escalade@^3.0.2: - version "3.1.0" - resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.0.tgz#e8e2d7c7a8b76f6ee64c2181d6b8151441602d4e" - integrity sha512-mAk+hPSO8fLDkhV7V0dXazH5pDc6MrjBTPyD3VeKzxnVFjH1MIxbCdqGZB9O8+EwWakZs3ZCbDS4IpRt79V1ig== - -event-target-shim@^5.0.0: - version "5.0.1" - resolved "https://registry.yarnpkg.com/event-target-shim/-/event-target-shim-5.0.1.tgz#5d4d3ebdf9583d63a5333ce2deb7480ab2b05789" - integrity sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ== - -extract-zip@^2.0.0: - version "2.0.1" - resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a" - integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg== - dependencies: - debug "^4.1.1" - get-stream "^5.1.0" - yauzl "^2.10.0" - optionalDependencies: - "@types/yauzl" "^2.9.1" - -fd-slicer@~1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e" - integrity sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4= - dependencies: - pend "~1.2.0" - -find-up@^4.0.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19" - integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw== - dependencies: - locate-path "^5.0.0" - path-exists "^4.0.0" - -fs-constants@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad" - integrity sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow== - -fs.realpath@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" - integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8= - -get-caller-file@^2.0.5: - version "2.0.5" - resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" - integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== - -get-stream@^5.1.0: - version "5.2.0" - resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3" - integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA== - dependencies: - pump "^3.0.0" - -glob@^7.1.3: - version "7.1.6" - resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6" - integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== - dependencies: - fs.realpath "^1.0.0" - inflight "^1.0.4" - inherits "2" - minimatch "^3.0.4" - once "^1.3.0" - path-is-absolute "^1.0.0" - -https-proxy-agent@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz#702b71fb5520a132a66de1f67541d9e62154d82b" - integrity sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg== - dependencies: - agent-base "5" - debug "4" - -ieee754@^1.1.4: - version "1.1.13" - resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.1.13.tgz#ec168558e95aa181fd87d37f55c32bbcb6708b84" - integrity sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg== - -inflight@^1.0.4: - version "1.0.6" - resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" - integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk= - dependencies: - once "^1.3.0" - wrappy "1" - -inherits@2, inherits@^2.0.3, inherits@^2.0.4: - version "2.0.4" - resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" - integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== - -is-fullwidth-code-point@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" - integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== - -locate-path@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0" - integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g== - dependencies: - p-locate "^4.1.0" - -minimatch@^3.0.4: - version "3.0.4" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083" - integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA== - dependencies: - brace-expansion "^1.1.7" - -mkdirp-classic@^0.5.2: - version "0.5.3" - resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113" - integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A== - -ms@2.1.2: - version "2.1.2" - resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" - integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== - -node-fetch@^2.6.1: - version "2.6.1" - resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052" - integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== - -once@^1.3.0, once@^1.3.1, once@^1.4.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" - integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E= - dependencies: - wrappy "1" - -p-limit@^2.2.0: - version "2.3.0" - resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1" - integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w== - dependencies: - p-try "^2.0.0" - -p-locate@^4.1.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07" - integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A== - dependencies: - p-limit "^2.2.0" - -p-try@^2.0.0: - version "2.2.0" - resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6" - integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ== - -path-exists@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" - integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== - -path-is-absolute@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" - integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= - -pend@~1.2.0: - version "1.2.0" - resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" - integrity sha1-elfrVQpng/kRUzH89GY9XI4AelA= - -pkg-dir@^4.2.0: - version "4.2.0" - resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3" - integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ== - dependencies: - find-up "^4.0.0" - -progress@^2.0.1: - version "2.0.3" - resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" - integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== - -proxy-from-env@^1.0.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2" - integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg== - -pump@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64" - integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww== - dependencies: - end-of-stream "^1.1.0" - once "^1.3.1" - -puppeteer-cluster@^0.22.0: - version "0.22.0" - resolved "https://registry.yarnpkg.com/puppeteer-cluster/-/puppeteer-cluster-0.22.0.tgz#4ab214671f414f15ad6a94a4b61ed0b4172e86e6" - integrity sha512-hmydtMwfVM+idFIDzS8OXetnujHGre7RY3BGL+3njy9+r8Dcu3VALkZHfuBEPf6byKssTCgzxU1BvLczifXd5w== - dependencies: - debug "^4.1.1" - -puppeteer-core@^5.3.1: - version "5.3.1" - resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-5.3.1.tgz#1affb1738afac499416a7fd4ed2ed0c18577e88f" - integrity sha512-YE6c6FvHAFKQUyNTqFs78SgGmpcqOPhhmVfEVNYB4abv7bV2V+B3r72T3e7vlJkEeTloy4x9bQLrGbHHoKSg1w== - dependencies: - debug "^4.1.0" - devtools-protocol "0.0.799653" - extract-zip "^2.0.0" - https-proxy-agent "^4.0.0" - pkg-dir "^4.2.0" - progress "^2.0.1" - proxy-from-env "^1.0.0" - rimraf "^3.0.2" - tar-fs "^2.0.0" - unbzip2-stream "^1.3.3" - ws "^7.2.3" - -readable-stream@^3.1.1, readable-stream@^3.4.0: - version "3.6.0" - resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" - integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA== - dependencies: - inherits "^2.0.3" - string_decoder "^1.1.1" - util-deprecate "^1.0.1" - -require-directory@^2.1.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" - integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I= - -rimraf@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" - integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== - dependencies: - glob "^7.1.3" - -safe-buffer@~5.2.0: - version "5.2.1" - resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" - integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== - -string-width@^4.1.0, string-width@^4.2.0: - version "4.2.0" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.0.tgz#952182c46cc7b2c313d1596e623992bd163b72b5" - integrity sha512-zUz5JD+tgqtuDjMhwIg5uFVV3dtqZ9yQJlZVfq4I01/K5Paj5UHj7VyrQOJvzawSVlKpObApbfD0Ed6yJc+1eg== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.0" - -string_decoder@^1.1.1: - version "1.3.0" - resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e" - integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== - dependencies: - safe-buffer "~5.2.0" - -strip-ansi@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.0.tgz#0b1571dd7669ccd4f3e06e14ef1eed26225ae532" - integrity sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w== - dependencies: - ansi-regex "^5.0.0" - -tar-fs@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.0.tgz#d1cdd121ab465ee0eb9ccde2d35049d3f3daf0d5" - integrity sha512-9uW5iDvrIMCVpvasdFHW0wJPez0K4JnMZtsuIeDI7HyMGJNxmDZDOCQROr7lXyS+iL/QMpj07qcjGYTSdRFXUg== - dependencies: - chownr "^1.1.1" - mkdirp-classic "^0.5.2" - pump "^3.0.0" - tar-stream "^2.0.0" - -tar-stream@^2.0.0: - version "2.1.4" - resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.1.4.tgz#c4fb1a11eb0da29b893a5b25476397ba2d053bfa" - integrity sha512-o3pS2zlG4gxr67GmFYBLlq+dM8gyRGUOvsrHclSkvtVtQbjV0s/+ZE8OpICbaj8clrX3tjeHngYGP7rweaBnuw== - dependencies: - bl "^4.0.3" - end-of-stream "^1.4.1" - fs-constants "^1.0.0" - inherits "^2.0.3" - readable-stream "^3.1.1" - -through@^2.3.8: - version "2.3.8" - resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" - integrity sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU= - -unbzip2-stream@^1.3.3: - version "1.4.3" - resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7" - integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg== - dependencies: - buffer "^5.2.1" - through "^2.3.8" - -util-deprecate@^1.0.1: - version "1.0.2" - resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" - integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8= - -wrap-ansi@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - -wrappy@1: - version "1.0.2" - resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" - integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= - -ws@^7.2.3: - version "7.3.1" - resolved "https://registry.yarnpkg.com/ws/-/ws-7.3.1.tgz#d0547bf67f7ce4f12a72dfe31262c68d7dc551c8" - integrity sha512-D3RuNkynyHmEJIpD2qrgVkc9DQ23OrN/moAwZX4L8DfvszsJxpjQuUq3LMx6HoYji9fbIOBY18XWBsAux1ZZUA== - -y18n@^5.0.1: - version "5.0.1" - resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.1.tgz#1ad2a7eddfa8bce7caa2e1f6b5da96c39d99d571" - integrity sha512-/jJ831jEs4vGDbYPQp4yGKDYPSCCEQ45uZWJHE1AoYBzqdZi8+LDWas0z4HrmJXmKdpFsTiowSHXdxyFhpmdMg== - -yargs-parser@^20.0.0: - version "20.0.0" - resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.0.0.tgz#c65a1daaa977ad63cebdd52159147b789a4e19a9" - integrity sha512-8eblPHTL7ZWRkyjIZJjnGf+TijiKJSwA24svzLRVvtgoi/RZiKa9fFQTrlx0OKLnyHSdt/enrdadji6WFfESVA== - -yargs@^16.0.3: - version "16.0.3" - resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.0.3.tgz#7a919b9e43c90f80d4a142a89795e85399a7e54c" - integrity sha512-6+nLw8xa9uK1BOEOykaiYAJVh6/CjxWXK/q9b5FpRgNslt8s22F2xMBqVIKgCRjNgGvGPBy8Vog7WN7yh4amtA== - dependencies: - cliui "^7.0.0" - escalade "^3.0.2" - get-caller-file "^2.0.5" - require-directory "^2.1.1" - string-width "^4.2.0" - y18n "^5.0.1" - yargs-parser "^20.0.0" - -yauzl@^2.10.0: - version "2.10.0" - resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" - integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk= - dependencies: - buffer-crc32 "~0.2.3" - fd-slicer "~1.1.0" diff --git a/zimit.py b/zimit.py index 9272d5d..ffa0e2e 100755 --- a/zimit.py +++ b/zimit.py @@ -114,31 +114,15 @@ def zimit(args=None): atexit.register(cleanup) - # create pywb collection - print("") - print("----------") - print("pywb init") - subprocess.run( - ["/usr/bin/env", "wb-manager", "init", "capture"], check=True, cwd=temp_root_dir - ) # nosec - - subprocess.Popen( - ["/usr/bin/env", "redis-server"], cwd=temp_root_dir, stdout=subprocess.DEVNULL - ) # nosec - - subprocess.Popen( - ["/usr/bin/env", "uwsgi", os.getcwd() + "/uwsgi.ini"], - cwd=temp_root_dir, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) # nosec - cmd_args = get_node_cmd_line(zimit_args) + cmd_args.append("--cwd") + cmd_args.append(str(temp_root_dir)) + cmd_line = " ".join(cmd_args) print("") print("----------") - print("running zimit driver: " + cmd_line) + print("running browsertrix-crawler crawl: " + cmd_line) subprocess.run(cmd_args, check=True) warc_files = temp_root_dir / "collections" / "capture" / "archive" @@ -153,7 +137,7 @@ def zimit(args=None): return warc2zim(warc2zim_args) def get_node_cmd_line(args): - node_cmd = ["/usr/bin/env", "node", "crawler.js"] + node_cmd = ["crawl"] for arg in [ "url", "workers",