diff --git a/Dockerfile b/Dockerfile
index faf9bea..013a42c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,12 +15,14 @@ ENV PROXY_HOST=localhost \
 
 RUN pip install gevent>=20.9.0 uwsgi
 
-RUN pip install warc2zim==1.2.0
+#RUN pip install git+https://github.com/openzim/warc2zim@fuzzy-match
+RUN pip install 'warc2zim>=1.3.0'
 
 RUN pip install git+https://github.com/webrecorder/pywb@patch-work
 
 COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/
 COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/
+COPY --from=chrome /opt/google/chrome/ /opt/google/chrome/
 
 WORKDIR /app
 
@@ -32,6 +34,8 @@ ADD config.yaml /app/
 ADD uwsgi.ini /app/
 ADD zimit.py /app/
 ADD crawler.js /app/
+ADD autoplay.js /app/
+
 RUN ln -s /app/zimit.py /usr/bin/zimit
 
 CMD ["zimit"]
diff --git a/autoplay.js b/autoplay.js
new file mode 100644
index 0000000..d36b958
--- /dev/null
+++ b/autoplay.js
@@ -0,0 +1,89 @@
+(() => {
+
+  function run() {
+    if (window.navigator.__crawler_autoplay) {
+      return;
+    }
+
+    //console.log("checking autoplay for " + document.location.href);
+    window.navigator.__crawler_autoplay = true;
+
+    const specialActions = [
+      {
+        rx: /w\.soundcloud\.com/,
+        check(url) {
+          const autoplay = url.searchParams.get('auto_play');
+          return autoplay === 'true';
+        },
+        handle(url) {
+          url.searchParams.set('auto_play', 'true');
+          // set continuous_play to true in order to handle
+          // a playlist etc
+          url.searchParams.set('continuous_play', 'true');
+          self.location.href = url.href;
+        },
+      },
+      {
+        rx: [/player\.vimeo\.com/, /youtube\.com\/embed\//],
+        check(url) {
+          const autoplay = url.searchParams.get('autoplay');
+          return autoplay === '1';
+        },
+        handle(url) {
+          url.searchParams.set('autoplay', '1');
+          if (window.__crawler_autoplayLoad) {
+            window.__crawler_autoplayLoad(url.href);
+          }
+          self.location.href = url.href;
+        },
+      },
+    ];
+    const url = new URL(self.location.href);
+    for (let i = 0; i < specialActions.length; i++) {
+      if (Array.isArray(specialActions[i].rx)) {
+        const rxs = specialActions[i].rx;
+        for (let j = 0; j < rxs.length; j++) {
+          if (url.href.search(rxs[j]) >= 0) {
+            if (specialActions[i].check(url)) return;
+            return specialActions[i].handle(url);
+          }
+        }
+      } else if (url.href.search(specialActions[i].rx) >= 0) {
+        if (specialActions[i].check(url)) return;
+        return specialActions[i].handle(url);
+      }
+    }
+  }
+
+  document.addEventListener("readystatechange", run);
+
+  if (document.readyState === "complete") {
+    run();
+  }
+
+
+  const mediaSet = new Set();
+
+  setInterval(() => {
+    const medias = document.querySelectorAll("video, audio");
+
+    for (const media of medias) {
+      try {
+        if (media.src && !mediaSet.has(media.src)) {
+          if (window.__crawler_queueUrls && (media.src.startsWith("http:") || media.src.startsWith("https:"))) {
+            window.__crawler_queueUrls(media.src);
+          }
+          mediaSet.add(media.src);
+        } else if (!media.src) {
+          media.play();
+        }
+      } catch(e) {
+        console.log(e);
+      }
+    }
+  }, 3000);
+
+ 
+
+})();
+
diff --git a/crawler.js b/crawler.js
index 9425538..4808f4c 100644
--- a/crawler.js
+++ b/crawler.js
@@ -1,4 +1,5 @@
-const puppeteer = require("puppeteer");
+const fs = require("fs");
+const puppeteer = require("puppeteer-core");
 const { Cluster } = require("puppeteer-cluster");
 const child_process = require("child_process");
 const fetch = require("node-fetch");
@@ -6,6 +7,7 @@ const AbortController = require("abort-controller");
 
 const HTML_TYPES = ["text/html", "application/xhtml", "application/xhtml+xml"];
 const WAIT_UNTIL_OPTS = ["load", "domcontentloaded", "networkidle0", "networkidle2"];
+const NEW_CONTEXT_OPTS = ["page", "session", "browser"];
 const CHROME_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36";
 
 // to ignore HTTPS error for HEAD check
@@ -24,30 +26,58 @@ process.once('SIGTERM', (code) => {
 });
 
 
+const autoplayScript = fs.readFileSync("./autoplay.js", "utf-8");
+
+
+// prefix for direct capture via pywb
+const capturePrefix = `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}/capture/record/id_/`;
+const headers = {"User-Agent": CHROME_USER_AGENT};
+
+
 async function run(params) {
   // Chrome Flags, including proxy server
   const args = [
     "--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically)
     `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`,
-    "--no-sandbox"
+    "--no-sandbox",
+    "--disable-background-media-suspend",
+    "--autoplay-policy=no-user-gesture-required",
   ];
 
-  // prefix for direct capture via pywb
-  const capturePrefix = `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}/capture/record/id_/`;
-
   // Puppeter Options
   const puppeteerOptions = {
     headless: true,
-    //executablePath: "/usr/bin/google-chrome",
+    executablePath: "/opt/google/chrome/google-chrome",
     ignoreHTTPSErrors: true,
     args
   };
 
+  // params
+  const { url, waitUntil, timeout, scope, limit, exclude, scroll, newContext } = params;
+
+  let concurrency = Cluster.CONCURRENCY_PAGE;
+
+  switch (newContext) {
+    case "page":
+      concurrency = Cluster.CONCURRENCY_PAGE;
+      break;
+
+    case "session":
+      concurrency = Cluster.CONCURRENCY_CONTEXT;
+      break;
+
+    case "browser":
+      concurrency = Cluster.CONCURRENCY_BROWSER;
+      break;
+  }
+
   // Puppeteer Cluster init and options
   const cluster = await Cluster.launch({
-    concurrency: Cluster.CONCURRENCY_PAGE,
+    concurrency,
     maxConcurrency: Number(params.workers) || 1,
     skipDuplicateUrls: true,
+    // total timeout for cluster
+    timeout: timeout * 2,
     puppeteerOptions,
     puppeteer,
     monitor: true
@@ -56,9 +86,6 @@ async function run(params) {
   // Maintain own seen list
   const seenList = new Set();
 
-  // params
-  const { url, waitUntil, timeout, scope, limit, exclude, scroll } = params;
-
   //console.log("Limit: " + limit);
 
   // links crawled counter
@@ -72,12 +99,46 @@ async function run(params) {
       return;
     }
 
+    //page.on('console', message => console.log(`${message.type()} ${message.text()}`));
+    //page.on('pageerror', message => console.warn(message));
+    //page.on('error', message => console.warn(message));
+    //page.on('requestfailed', message => console.warn(message._failureText));
+    const mediaResults = [];
+
+    await page.exposeFunction('__crawler_queueUrls', (url) => {
+      mediaResults.push(directCapture(url));
+    });
+
+    let waitForVideo = false;
+
+    await page.exposeFunction('__crawler_autoplayLoad', (url) => {
+      console.log("*** Loading autoplay URL: " + url);
+      waitForVideo = true;
+    });
+
+    try {
+      await page.evaluateOnNewDocument(autoplayScript);
+    } catch(e) {
+      console.log(e);
+    }
+
     try {
       await page.goto(url, {waitUntil, timeout});
     } catch (e) {
       console.log(`Load timeout for ${url}`);
     }
 
+    try {
+      await Promise.all(mediaResults);
+    } catch (e) {
+      console.log(`Error loading media URLs`, e);
+    }
+
+    if (waitForVideo) {
+      console.log("Extra wait 15s for video loading");
+      await sleep(15000);
+    }
+
     if (scroll) {
       try {
         await Promise.race([page.evaluate(autoScroll), sleep(30000)]);
@@ -148,8 +209,18 @@ function shouldCrawl(scope, seenList, url, exclude) {
     return false;
   }
 
-  // if scope is provided, skip urls not in scope
-  if (scope && !url.startsWith(scope)) {
+  let inScope = false;
+
+  // check scopes
+  for (const s of scope) {
+    if (s.exec(url)) {
+      inScope = true;
+      break;
+    }
+  }
+
+  if (!inScope) {
+    //console.log(`Not in scope ${url} ${scope}`);
     return false;
   }
 
@@ -166,8 +237,6 @@ function shouldCrawl(scope, seenList, url, exclude) {
 
 async function htmlCheck(url, capturePrefix) {
   try {
-    const headers = {"User-Agent": CHROME_USER_AGENT};
-
     const agent = url.startsWith("https:") ? HTTPS_AGENT : null;
 
     const resp = await fetch(url, {method: "HEAD", headers, agent});
@@ -191,11 +260,7 @@ async function htmlCheck(url, capturePrefix) {
     }
 
     // capture directly
-    console.log(`Direct capture: ${capturePrefix}${url}`);
-    const abort = new AbortController();
-    const signal = abort.signal;
-    const resp2 = await fetch(capturePrefix + url, {signal, headers});
-    abort.abort();
+    await directCapture(url);
 
     return false;
   } catch(e) {
@@ -205,6 +270,15 @@ async function htmlCheck(url, capturePrefix) {
   }
 }
 
+async function directCapture(url) {
+  console.log(`Direct capture: ${capturePrefix}${url}`);
+  const abort = new AbortController();
+  const signal = abort.signal;
+  const resp2 = await fetch(capturePrefix + url, {signal, headers});
+  abort.abort();
+}
+
+
 
 async function autoScroll() {
   const canScrollMore = () =>
@@ -249,6 +323,12 @@ async function main() {
       type: "number",
     },
 
+    "newContext": {
+      describe: "The context for each new capture, can be a new: page, session or browser.",
+      default: "page",
+      type: "string"
+    },
+
     "waitUntil": {
       describe: "Puppeteer page.goto() condition to wait for before continuing",
       default: "load",
@@ -267,11 +347,11 @@ async function main() {
     },
 
     "scope": {
-      describe: "The scope of current page that should be included in the crawl (defaults to the immediate directory of URL)",
+      describe: "Regex of page URLs that should be included in the crawl (defaults to the immediate directory of URL)",
     },
 
     "exclude": {
-      describe: "Regex of URLs that should be excluded from the crawl."
+      describe: "Regex of page URLs that should be excluded from the crawl."
     },
 
     "scroll": {
@@ -291,7 +371,8 @@ async function main() {
       argv.url = url.href;
 
       if (!argv.scope) {
-        argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
+        //argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
+        argv.scope = [new RegExp("^" + rxEscape(url.href.slice(0, url.href.lastIndexOf("/") + 1)))];
       }
 
       argv.timeout *= 1000;
@@ -302,6 +383,11 @@ async function main() {
         throw new Error("Invalid waitUntil, must be one of: " + WAIT_UNTIL_OPTS.join(","));
       }
 
+      if (!NEW_CONTEXT_OPTS.includes(argv.newContext)) {
+        throw new Error("Invalid newContext, must be one of: " + NEW_CONTEXT_OPTS.join(","));
+      }
+
+      // Support one or multiple exclude
       if (argv.exclude) {
         if (typeof(argv.exclude) === "string") {
           argv.exclude = [new RegExp(argv.exclude)];
@@ -312,11 +398,23 @@ async function main() {
         argv.exclude = [];
       }
 
+      // Support one or multiple scopes
+      if (argv.scope) {
+        if (typeof(argv.scope) === "string") {
+          argv.scope = [new RegExp(argv.scope)];
+        } else {
+          argv.scope = argv.scope.map(e => new RegExp(e));
+        }
+      } else {
+        argv.scope = [];
+      }
+
       return true;
     })
   .argv;
 
   console.log("Exclusions Regexes: ", params.exclude);
+  console.log("Scope Regexes: ", params.scope);
 
   try {
     await run(params);
@@ -328,6 +426,11 @@ async function main() {
   }
 }
 
+function rxEscape(string) {
+  return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
+}
+
+
 main();
 
 
diff --git a/package.json b/package.json
index e991921..3820178 100644
--- a/package.json
+++ b/package.json
@@ -8,8 +8,8 @@
   "dependencies": {
     "abort-controller": "^3.0.0",
     "node-fetch": "^2.6.1",
-    "puppeteer": "^5.3.0",
     "puppeteer-cluster": "^0.22.0",
+    "puppeteer-core": "^5.3.1",
     "yargs": "^16.0.3"
   }
 }
diff --git a/yarn.lock b/yarn.lock
index 458e423..91ceab7 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -8,9 +8,9 @@
   integrity sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ==
 
 "@types/node@*":
-  version "14.11.1"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.11.1.tgz#56af902ad157e763f9ba63d671c39cda3193c835"
-  integrity sha512-oTQgnd0hblfLsJ6BvJzzSL+Inogp3lq9fGgqRkMB/ziKMgEUaFl801OncOzUmalfzt14N0oPHMK47ipl+wbTIw==
+  version "14.14.0"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.0.tgz#f1091b6ad5de18e8e91bdbd43ec63f13de372538"
+  integrity sha512-BfbIHP9IapdupGhq/hc+jT5dyiBVZ2DdeC5WwJWQWDb0GijQlzUFAeIQn/2GtvZcd2HVUU7An8felIICFTC2qg==
 
 "@types/yauzl@^2.9.1":
   version "2.9.1"
@@ -247,11 +247,6 @@ locate-path@^5.0.0:
   dependencies:
     p-locate "^4.1.0"
 
-mime@^2.0.3:
-  version "2.4.6"
-  resolved "https://registry.yarnpkg.com/mime/-/mime-2.4.6.tgz#e5b407c90db442f2beb5b162373d07b69affa4d1"
-  integrity sha512-RZKhC3EmpBchfTGBVb8fb+RL2cWyw/32lshnsETttkBAyAUXSGHxbEJWWRXc751DrIxG1q04b8QwMbAwkRPpUA==
-
 minimatch@^3.0.4:
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
@@ -347,16 +342,15 @@ puppeteer-cluster@^0.22.0:
   dependencies:
     debug "^4.1.1"
 
-puppeteer@^5.3.0:
-  version "5.3.0"
-  resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-5.3.0.tgz#0abf83d0f2d1273baf2b56885a813f8052903e33"
-  integrity sha512-GjqMk5GRro3TO0sw3QMsF1H7n+/jaK2OW45qMvqjYUyJ7y4oA//9auy969HHhTG3HZXaMxY/NWXF/NXlAFIvtw==
+puppeteer-core@^5.3.1:
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-5.3.1.tgz#1affb1738afac499416a7fd4ed2ed0c18577e88f"
+  integrity sha512-YE6c6FvHAFKQUyNTqFs78SgGmpcqOPhhmVfEVNYB4abv7bV2V+B3r72T3e7vlJkEeTloy4x9bQLrGbHHoKSg1w==
   dependencies:
     debug "^4.1.0"
     devtools-protocol "0.0.799653"
     extract-zip "^2.0.0"
     https-proxy-agent "^4.0.0"
-    mime "^2.0.3"
     pkg-dir "^4.2.0"
     progress "^2.0.1"
     proxy-from-env "^1.0.0"
diff --git a/zimit.py b/zimit.py
index 30cec80..9272d5d 100755
--- a/zimit.py
+++ b/zimit.py
@@ -31,6 +31,13 @@ def zimit(args=None):
 
     parser.add_argument("-w", "--workers", type=int, help="Number of parallel workers")
 
+    parser.add_argument(
+        "--newContext",
+        help="The context for each new capture, can be a new: page, session or browser.",
+        choices=["page", "session", "browser"],
+        default="page",
+    )
+
     parser.add_argument(
         "--waitUntil",
         help="Puppeteer page.goto() condition to wait for before continuing",
@@ -51,11 +58,11 @@ def zimit(args=None):
 
     parser.add_argument(
         "--scope",
-        help="The scope of current page that should be included in the crawl (defaults to the immediate directory of the URL)",
+        help="Regex of page URLs that should be included in the crawl (defaults to the immediate directory of the URL)",
     )
 
     parser.add_argument(
-        "--exclude", help="Regex of URLs that should be excluded from the crawl."
+        "--exclude", help="Regex of page URLs that should be excluded from the crawl."
     )
 
     parser.add_argument(
@@ -150,6 +157,7 @@ def get_node_cmd_line(args):
     for arg in [
         "url",
         "workers",
+        "newContext",
         "waitUntil",
         "limit",
         "timeout",