Make code more efficient by defining regexes once

Former-commit-id: 9a068d12938d73ac7bbc31b3564da3e92411811f [formerly 3bcbe71717fa80b39f370057bd94901fedea4255 [formerly badd9f626aaddd9d4b32c773ac25e43e46d47c93]] Former-commit-id: 33d076f9a1e8bd4b8b5a42bb5f93a09447c66985 [formerly 7b39a65d148b54fb801523d09be46504de948f2a] Former-commit-id: 5b3c41f3c926176e0a2c206714116e2f1763ff2b
2025-09-13 14:28:38 -04:00 · 2022-06-01 06:17:36 +01:00 · 2022-06-01 06:17:36 +01:00 · 3946ff7162
commit 3946ff7162
parent 88ea328cc9
1 changed files with 12 additions and 8 deletions
--- a/www/js/lib/transformZimit.js
+++ b/www/js/lib/transformZimit.js
@ -72,6 +72,13 @@ define([], function () {
        return dirEntry;
    }
    /**
     * Establish some Regular Expressions used by the transformReplayUrls function
     */
    var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig;
    var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig;
    var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig;
    /**
     * The main function for transforming Zimit URLs into standard ZIM URLs.
     * @param {dirEntry} dirEntry The directory entry that points to the extracted data
@ -83,16 +90,16 @@ define([], function () {
    function transformReplayUrls(dirEntry, data, mimetype, selectedArchive) {
        /**
         * Transform URL links in HTML files
-         * Note that some Zimit ZIMs have mimteypes like 'text/html;raw=true', so we can't simply match 'text/html'
+         * Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html'
-         * Other ZIMs have mimetype like 'html' (with no 'text/'), so we have to match as generically as possible
+         * Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible
         */
        if (/\bhtml\b/i.test(mimetype)) { // 
            var zimitPrefix = data.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i);
            zimitPrefix = zimitPrefix ? zimitPrefix[2] : params.zimitPrefix;
            // Remove lazyimgage system and noscript tags that comment out images
            // DEV: Check if this is still necessary
            data = data.replace(/<noscript>\s*(<img\b[^>]+>)\s*<\/noscript>/ig, '$1');
            data = data.replace(/<span\b[^>]+lazy-image-placeholder[^<]+<\/span>\s*/ig, '');
            var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig;
            // Get stem for constructing an absolute URL
            var indexRoot = window.location.pathname.replace(/[^\/]+$/, '') + encodeURI(selectedArchive._file.name);
            data = data.replace(regexpZimitHtmlLinks, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
@ -133,13 +140,12 @@ define([], function () {
                return assetUrl;
            });
            // Remove any <base href...> statements
-            // DEV: You should probably deal with this more intelligently, changing absolute links rather than just removing
+            // DEV: You should probably deal with this more intelligently, changing absolute links rather than just removing,
            // but so far, removing it seems to do the job
            data = data.replace(/<base\b[^>]+href\b[^>]+>\s*/i, '');
-            // Remove any residual analytics
+            // Remove any residual analytics and ads
            data = data.replace(/<script\b([^<]|<(?!\/script>))+?(?:google.*?analytics|adsbygoogle)([^<]|<(?!\/script>))+<\/script>\s*/i, '');
            data = data.replace(/<ins\b(?:[^<]|<(?!\/ins>))+?adsbygoogle(?:[^<]|<(?!\/ins>))+<\/ins>\s*/ig, '');
@ -170,7 +176,6 @@ define([], function () {
         * Transform css-style links in stylesheet files and stylesheet blocks in HTML
         */
        if (/\b(css|html)\b/i.test(mimetype)) {
            var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig;
            data = data.replace(regexpZimitCssLinks, function (match, url) {
                var newBlock = match;
                var assetUrl = url;
@ -195,7 +200,6 @@ define([], function () {
         * Transform links in JavaScript files or script blocks in the html
         */
        if (/\b(javascript|html)\b/i.test(mimetype)) {
            var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig;
            data = data.replace(regexpZimitJavascriptLinks, function (match, url) {
                var newBlock = match;
                var assetUrl = url;