From 3946ff7162e3cbeb7de49c48bf5aed346ac46ed0 Mon Sep 17 00:00:00 2001 From: Jaifroid Date: Wed, 1 Jun 2022 06:17:36 +0100 Subject: [PATCH] Make code more efficient by defining regexes once Former-commit-id: 9a068d12938d73ac7bbc31b3564da3e92411811f [formerly 3bcbe71717fa80b39f370057bd94901fedea4255 [formerly badd9f626aaddd9d4b32c773ac25e43e46d47c93]] Former-commit-id: 33d076f9a1e8bd4b8b5a42bb5f93a09447c66985 [formerly 7b39a65d148b54fb801523d09be46504de948f2a] Former-commit-id: 5b3c41f3c926176e0a2c206714116e2f1763ff2b --- www/js/lib/transformZimit.js | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/www/js/lib/transformZimit.js b/www/js/lib/transformZimit.js index 2cd64564..7bd9b1ea 100644 --- a/www/js/lib/transformZimit.js +++ b/www/js/lib/transformZimit.js @@ -72,6 +72,13 @@ define([], function () { return dirEntry; } + /** + * Establish some Regular Expressions used by the transformReplayUrls function + */ + var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig; + var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig; + var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig; + /** * The main function for transforming Zimit URLs into standard ZIM URLs. * @param {dirEntry} dirEntry The directory entry that points to the extracted data @@ -83,16 +90,16 @@ define([], function () { function transformReplayUrls(dirEntry, data, mimetype, selectedArchive) { /** * Transform URL links in HTML files - * Note that some Zimit ZIMs have mimteypes like 'text/html;raw=true', so we can't simply match 'text/html' - * Other ZIMs have mimetype like 'html' (with no 'text/'), so we have to match as generically as possible + * Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html' + * Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible */ if (/\bhtml\b/i.test(mimetype)) { // var zimitPrefix = data.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i); zimitPrefix = zimitPrefix ? zimitPrefix[2] : params.zimitPrefix; // Remove lazyimgage system and noscript tags that comment out images + // DEV: Check if this is still necessary data = data.replace(/