diff --git a/www/js/lib/transformZimit.js b/www/js/lib/transformZimit.js index 2cd64564..7bd9b1ea 100644 --- a/www/js/lib/transformZimit.js +++ b/www/js/lib/transformZimit.js @@ -72,6 +72,13 @@ define([], function () { return dirEntry; } + /** + * Establish some Regular Expressions used by the transformReplayUrls function + */ + var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig; + var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig; + var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig; + /** * The main function for transforming Zimit URLs into standard ZIM URLs. * @param {dirEntry} dirEntry The directory entry that points to the extracted data @@ -83,16 +90,16 @@ define([], function () { function transformReplayUrls(dirEntry, data, mimetype, selectedArchive) { /** * Transform URL links in HTML files - * Note that some Zimit ZIMs have mimteypes like 'text/html;raw=true', so we can't simply match 'text/html' - * Other ZIMs have mimetype like 'html' (with no 'text/'), so we have to match as generically as possible + * Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html' + * Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible */ if (/\bhtml\b/i.test(mimetype)) { // var zimitPrefix = data.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i); zimitPrefix = zimitPrefix ? zimitPrefix[2] : params.zimitPrefix; // Remove lazyimgage system and noscript tags that comment out images + // DEV: Check if this is still necessary data = data.replace(/