diff --git a/index.js b/index.js
index a49beeb..a5a2103 100644
--- a/index.js
+++ b/index.js
@@ -3,12 +3,13 @@ const { Cluster } = require("puppeteer-cluster");
 const child_process = require("child_process");
 
 async function run(params) {
+  // Chrome Flags, including proxy server
   const args = [
-    "--no-first-run",
-    "--no-xshm",
+    "--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically)
     `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`
   ];
 
+  // Puppeter Options
   const puppeteerOptions = {
     headless: true,
     executablePath: "/usr/bin/google-chrome",
@@ -16,6 +17,7 @@ async function run(params) {
     args
   };
 
+  // Puppeteer Cluster init and options
   const cluster = await Cluster.launch({
     concurrency: Cluster.CONCURRENCY_PAGE,
     maxConcurrency: Number(params.workers) || 1,
@@ -25,14 +27,22 @@ async function run(params) {
     monitor: true
   });
 
+  // Maintain own seen list
   let seenList = new Set();
   const url = params._[0];
 
   let { waitUntil, timeout, scope } = params;
+
+  // waitUntil condition (see: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options)
   waitUntil = waitUntil || "load";
+
+  // Timeout per page
   timeout = timeout || 60000;
+
+  // Scope for crawl, default to the domain of the URL
   scope = scope || new URL(url).origin + "/";
 
+  // Crawl Task
   cluster.task(async ({page, data}) => {
     const {url} = data;
 
@@ -48,13 +58,10 @@ async function run(params) {
       });
 
       for (data of result) {
-        if (seenList.has(data.url)) {
-          continue;
-        }
-        //console.log(`check ${data.url} in ${allowedDomain}`);
-        if (scope && data.url.startsWith(scope)) {
-          seenList.add(data.url);
-          cluster.queue({url: data.url});
+        const newUrl = shouldCrawl(scope, seenList, data.url);
+        if (newUrl) {
+          seenList.add(newUrl);
+          cluster.queue({url: newUrl});
         }
       }
     } catch (e) {
@@ -81,6 +88,37 @@ async function run(params) {
 }
 
 
+function shouldCrawl(scope, seenList, url) {
+  try {
+    url = new URL(url);
+  } catch(e) {
+    return false;
+  }
+
+  // remove hashtag
+  url.hash = "";
+
+  // only queue http/https URLs
+  if (url.protocol != "http:" && url.protocol != "https:") {
+    return false;
+  }
+
+  url = url.href;
+
+  // skip already crawled
+  if (seenList.has(url)) {
+    return false;
+  }
+
+  // if scope is provided, skip urls not in scope
+  if (scope && !url.startsWith(scope)) {
+    return false;
+  }
+
+  return url;
+}
+
+
 async function main() {
   const params = require('yargs').argv;
   console.log(params);