diff --git a/Dockerfile b/Dockerfile
index 432b827..0d66ed8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,20 +8,14 @@ ENV PROXY_HOST=localhost \
     PROXY_CA_FILE=/tmp/proxy-ca.pem \
     NO_SOCAT=1
 
-RUN pip install pywb uwsgi warc2zim
+RUN pip install pywb uwsgi
 # force reinstall of gevent to prevent segfault on uwsgi worker
 RUN pip install -U gevent
 
-#COPY --from=chrome /opt/google/chrome/ /opt/google/chrome/
-  
-#COPY --from=chrome /app/ /browser/
+RUN pip install warc2zim==1.0.1
+
 COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/
 COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/
-#COPY --from=chrome /usr/bin/run_forever /usr/bin/
-#COPY --from=chrome /usr/bin/wget /usr/bin/
-#COPY --from=chrome /usr/bin/certutil /usr/bin/
-
-#RUN ln -s /opt/google/chrome/google-chrome /usr/bin/google-chrome
 
 RUN useradd zimit --shell /bin/bash --create-home \
   && usermod -a -G sudo zimit \
@@ -34,7 +28,7 @@ ADD package.json /app/
 
 RUN chown -R zimit /app
 
-#USER zimit
+RUN apt-get update && apt-get install -qqy fonts-stix
 
 RUN yarn install
 
diff --git a/README.md b/README.md
index f42d6df..b62cc7e 100644
--- a/README.md
+++ b/README.md
@@ -25,20 +25,21 @@ docker build -t openzim/zimit .
 
 The image accepts the following parameters:
 
-- `URL` - the url to be crawled (required)
+- `--url URL` - the url to be crawled (required)
 - `--workers N` - number of crawl workers to be run in parallel
 - `--wait-until` - Puppeteer setting for how long to wait for page load. See [page.goto waitUntil options](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options). The default is `load`, but for static sites, `--wait-until domcontentloaded` may be used to speed up the crawl (to avoid waiting for ads to load for example).
 - `--name` - Name of ZIM file (defaults to the hostname of the URL)
 - `--output` - output directory (defaults to `/output`)
 - `--limit U` - Limit capture to at most U URLs
 - `--exclude <regex>` - skip URLs that match the regex from crawling. Can be specified multiple times.
+- `--scroll` - if set, will activate a simple auto-scroll behavior on each page.
 
 The following is an example usage. The `--cap-add` and `--shm-size` flags are needed to run Chrome in Docker.
 
 Example command:
 
 ```
-docker run  -v /output:/output --cap-add=SYS_ADMIN --cap-add=NET_ADMIN --shm-size=1gb openzim/zimit URL --name myzimfile --workers 2 --wait-until domcontentloaded
+docker run  -v /output:/output --cap-add=SYS_ADMIN --cap-add=NET_ADMIN --shm-size=1gb openzim/zimit --url URL --name myzimfile --workers 2 --wait-until domcontentloaded
 ```
 
 The puppeteer-cluster provides monitoring output which is enabled by default and prints the crawl status to the Docker log.
diff --git a/config.yaml b/config.yaml
index a072f5a..7f16e3c 100644
--- a/config.yaml
+++ b/config.yaml
@@ -11,7 +11,7 @@ recorder: live
 
 #autoindex: 10
 
-#enable_auto_fetch: true
+enable_auto_fetch: true
 
 collections:
   live: $live
diff --git a/index.js b/index.js
index 0f4103b..83c0c91 100644
--- a/index.js
+++ b/index.js
@@ -5,6 +5,8 @@ const fetch = require("node-fetch");
 const AbortController = require("abort-controller");
 
 const HTML_TYPES = ["text/html", "application/xhtml", "application/xhtml+xml"];
+const WAIT_UNTIL_OPTS = ["load", "domcontentloaded", "networkidle0", "networkidle2"];
+const CHROME_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36";
 
 
 async function run(params) {
@@ -36,32 +38,10 @@ async function run(params) {
   });
 
   // Maintain own seen list
-  let seenList = new Set();
-  const url = params._[0];
+  const seenList = new Set();
 
-  let { waitUntil, timeout, scope, limit, exclude } = params;
-
-  // waitUntil condition (see: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options)
-  waitUntil = waitUntil || "load";
-
-  // Timeout per page
-  timeout = Number(timeout) || 60000;
-
-  // Scope for crawl, default to the domain of the URL
-  scope = scope || new URL(url).origin + "/";
-
-  // Limit number of pages captured
-  limit = Number(limit) || 0;
-
-  if (exclude) {
-    if (typeof(exclude) === "string") {
-      exclude = [new RegExp(exclude)];
-    } else {
-      exclude = exclude.map(e => new RegExp(e));
-    }
-  } else {
-    exclude = [];
-  }
+  // params
+  const { url, waitUntil, timeout, scope, limit, exclude, scroll } = params;
 
   console.log("Limit: " + limit);
 
@@ -82,6 +62,14 @@ async function run(params) {
       console.log(`Load timeout for ${url}`);
     }
 
+    if (scroll) {
+      try {
+        await Promise.race([page.evaluate(autoScroll), sleep(30000)]);
+      } catch (e) {
+        console.warn("Behavior Failed", e);
+      }
+    }
+
     let results = null;
 
     try {
@@ -119,7 +107,7 @@ async function run(params) {
   const zimName = params.name || new URL(url).hostname;
   const zimOutput = params.output || "/output";
 
-  const warc2zim = `warc2zim --url ${url} --name ${zimName} --output ${zimOutput} ./collections/capture/archive/\*.warc.gz`;
+  const warc2zim = `warc2zim -a --url ${url} --name ${zimName} --output ${zimOutput} ./collections/capture/archive/\*.warc.gz`;
 
   console.log("Running: " + warc2zim);
 
@@ -169,7 +157,9 @@ function shouldCrawl(scope, seenList, url, exclude) {
 
 async function htmlCheck(url, capturePrefix) {
   try {
-    const resp = await fetch(url, {method: "HEAD"});
+    const headers = {"User-Agent": CHROME_USER_AGENT};
+
+    const resp = await fetch(url, {method: "HEAD", headers});
 
     if (resp.status >= 400) {
       console.log(`Skipping ${url}, invalid status ${resp.status}`);
@@ -193,7 +183,7 @@ async function htmlCheck(url, capturePrefix) {
     console.log(`Direct capture: ${capturePrefix}${url}`);
     const abort = new AbortController();
     const signal = abort.signal;
-    const resp2 = await fetch(capturePrefix + url, {signal});
+    const resp2 = await fetch(capturePrefix + url, {signal, headers});
     abort.abort();
 
     return false;
@@ -205,9 +195,111 @@ async function htmlCheck(url, capturePrefix) {
 }
 
 
+async function autoScroll() {
+  const canScrollMore = () =>
+    self.scrollY + self.innerHeight <
+    Math.max(
+      self.document.body.scrollHeight,
+      self.document.body.offsetHeight,
+      self.document.documentElement.clientHeight,
+      self.document.documentElement.scrollHeight,
+      self.document.documentElement.offsetHeight
+    );
+
+  const scrollOpts = { top: 250, left: 0, behavior: 'auto' };
+
+  while (canScrollMore()) {
+    self.scrollBy(scrollOpts);
+    await new Promise(resolve => setTimeout(resolve, 500));
+  }
+}
+
+function sleep(time) {
+  return new Promise(resolve => setTimeout(resolve, time));
+}
+
+
 async function main() {
-  const params = require('yargs').argv;
-  console.log(params);
+  const params = require('yargs')
+  .usage("zimit <command> [options]")
+  .options({
+    "url": {
+      alias: "u",
+      describe: "The URL to start crawling from and main page for ZIM",
+      demandOption: true,
+      type: "string",
+    },
+
+    "workers": {
+      alias: "w",
+      describe: "The number of workers to run in parallel",
+      demandOption: false,
+      default: 1,
+      type: "number",
+    },
+
+    "waitUntil": {
+      describe: "Puppeteer page.goto() condition to wait for before continuing",
+      default: "load",
+    },
+
+    "limit": {
+      describe: "Limit crawl to this number of pages",
+      default: 0,
+      type: "number",
+    },
+
+    "timeout": {
+      describe: "Timeout for each page to load (in millis)",
+      default: 30000,
+      type: "number",
+    },
+
+    "scope": {
+      describe: "The scope of current page that should be included in the crawl (defaults to the domain of URL)",
+    },
+
+    "exclude": {
+      describe: "Regex of URLs that should be excluded from the crawl."
+    },
+
+    "scroll": {
+      describe: "If set, will autoscroll to bottom of the page",
+      type: "boolean",
+      default: false,
+    }}).check((argv, option) => {
+      // Scope for crawl, default to the domain of the URL
+      const url = new URL(argv.url);
+
+      if (url.protocol !== "http:" && url.protocol != "https:") {
+        throw new Error("URL must start with http:// or https://");
+      }
+
+      if (!argv.scope) {
+        argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
+      }
+
+      // waitUntil condition must be: load, domcontentloaded, networkidle0, networkidle2
+      // (see: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options)
+      if (!WAIT_UNTIL_OPTS.includes(argv.waitUntil)) {
+        throw new Error("Invalid waitUntil, must be one of: " + WAIT_UNTIL_OPTS.join(","));
+      }
+
+      if (argv.exclude) {
+        if (typeof(argv.exclude) === "string") {
+          argv.exclude = [new RegExp(argv.exclude)];
+        } else {
+          argv.exclude = argv.exclude.map(e => new RegExp(e));
+        }
+      } else {
+        argv.exclude = [];
+      }
+
+      return true;
+    })
+  .argv;
+
+  console.log("params", params);
 
   try {
     await run(params);