mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-13 14:28:38 -04:00
Make code more efficient by defining regexes once
Former-commit-id: 9a068d12938d73ac7bbc31b3564da3e92411811f [formerly 3bcbe71717fa80b39f370057bd94901fedea4255 [formerly badd9f626aaddd9d4b32c773ac25e43e46d47c93]] Former-commit-id: 33d076f9a1e8bd4b8b5a42bb5f93a09447c66985 [formerly 7b39a65d148b54fb801523d09be46504de948f2a] Former-commit-id: 5b3c41f3c926176e0a2c206714116e2f1763ff2b
This commit is contained in:
parent
88ea328cc9
commit
3946ff7162
@ -72,6 +72,13 @@ define([], function () {
|
|||||||
return dirEntry;
|
return dirEntry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Establish some Regular Expressions used by the transformReplayUrls function
|
||||||
|
*/
|
||||||
|
var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig;
|
||||||
|
var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig;
|
||||||
|
var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The main function for transforming Zimit URLs into standard ZIM URLs.
|
* The main function for transforming Zimit URLs into standard ZIM URLs.
|
||||||
* @param {dirEntry} dirEntry The directory entry that points to the extracted data
|
* @param {dirEntry} dirEntry The directory entry that points to the extracted data
|
||||||
@ -83,16 +90,16 @@ define([], function () {
|
|||||||
function transformReplayUrls(dirEntry, data, mimetype, selectedArchive) {
|
function transformReplayUrls(dirEntry, data, mimetype, selectedArchive) {
|
||||||
/**
|
/**
|
||||||
* Transform URL links in HTML files
|
* Transform URL links in HTML files
|
||||||
* Note that some Zimit ZIMs have mimteypes like 'text/html;raw=true', so we can't simply match 'text/html'
|
* Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html'
|
||||||
* Other ZIMs have mimetype like 'html' (with no 'text/'), so we have to match as generically as possible
|
* Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible
|
||||||
*/
|
*/
|
||||||
if (/\bhtml\b/i.test(mimetype)) { //
|
if (/\bhtml\b/i.test(mimetype)) { //
|
||||||
var zimitPrefix = data.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i);
|
var zimitPrefix = data.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i);
|
||||||
zimitPrefix = zimitPrefix ? zimitPrefix[2] : params.zimitPrefix;
|
zimitPrefix = zimitPrefix ? zimitPrefix[2] : params.zimitPrefix;
|
||||||
// Remove lazyimgage system and noscript tags that comment out images
|
// Remove lazyimgage system and noscript tags that comment out images
|
||||||
|
// DEV: Check if this is still necessary
|
||||||
data = data.replace(/<noscript>\s*(<img\b[^>]+>)\s*<\/noscript>/ig, '$1');
|
data = data.replace(/<noscript>\s*(<img\b[^>]+>)\s*<\/noscript>/ig, '$1');
|
||||||
data = data.replace(/<span\b[^>]+lazy-image-placeholder[^<]+<\/span>\s*/ig, '');
|
data = data.replace(/<span\b[^>]+lazy-image-placeholder[^<]+<\/span>\s*/ig, '');
|
||||||
var regexpZimitHtmlLinks = /(<(?:a|img|script|link|track|meta)\b[^>]*?[\s;])(?:src\b|href|url)\s*(=\s*(["']))(?=\/|https?:\/\/)((?:[^>](?!\3|\?|#))+[^>])([^>]*>)/ig;
|
|
||||||
// Get stem for constructing an absolute URL
|
// Get stem for constructing an absolute URL
|
||||||
var indexRoot = window.location.pathname.replace(/[^\/]+$/, '') + encodeURI(selectedArchive._file.name);
|
var indexRoot = window.location.pathname.replace(/[^\/]+$/, '') + encodeURI(selectedArchive._file.name);
|
||||||
data = data.replace(regexpZimitHtmlLinks, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
|
data = data.replace(regexpZimitHtmlLinks, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
|
||||||
@ -133,13 +140,12 @@ define([], function () {
|
|||||||
return assetUrl;
|
return assetUrl;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
// Remove any <base href...> statements
|
// Remove any <base href...> statements
|
||||||
// DEV: You should probably deal with this more intelligently, changing absolute links rather than just removing
|
// DEV: You should probably deal with this more intelligently, changing absolute links rather than just removing,
|
||||||
// but so far, removing it seems to do the job
|
// but so far, removing it seems to do the job
|
||||||
data = data.replace(/<base\b[^>]+href\b[^>]+>\s*/i, '');
|
data = data.replace(/<base\b[^>]+href\b[^>]+>\s*/i, '');
|
||||||
|
|
||||||
// Remove any residual analytics
|
// Remove any residual analytics and ads
|
||||||
data = data.replace(/<script\b([^<]|<(?!\/script>))+?(?:google.*?analytics|adsbygoogle)([^<]|<(?!\/script>))+<\/script>\s*/i, '');
|
data = data.replace(/<script\b([^<]|<(?!\/script>))+?(?:google.*?analytics|adsbygoogle)([^<]|<(?!\/script>))+<\/script>\s*/i, '');
|
||||||
data = data.replace(/<ins\b(?:[^<]|<(?!\/ins>))+?adsbygoogle(?:[^<]|<(?!\/ins>))+<\/ins>\s*/ig, '');
|
data = data.replace(/<ins\b(?:[^<]|<(?!\/ins>))+?adsbygoogle(?:[^<]|<(?!\/ins>))+<\/ins>\s*/ig, '');
|
||||||
|
|
||||||
@ -170,7 +176,6 @@ define([], function () {
|
|||||||
* Transform css-style links in stylesheet files and stylesheet blocks in HTML
|
* Transform css-style links in stylesheet files and stylesheet blocks in HTML
|
||||||
*/
|
*/
|
||||||
if (/\b(css|html)\b/i.test(mimetype)) {
|
if (/\b(css|html)\b/i.test(mimetype)) {
|
||||||
var regexpZimitCssLinks = /\burl\s*\(['"\s]*([^)'"\s]+)['"\s]*\)/ig;
|
|
||||||
data = data.replace(regexpZimitCssLinks, function (match, url) {
|
data = data.replace(regexpZimitCssLinks, function (match, url) {
|
||||||
var newBlock = match;
|
var newBlock = match;
|
||||||
var assetUrl = url;
|
var assetUrl = url;
|
||||||
@ -195,7 +200,6 @@ define([], function () {
|
|||||||
* Transform links in JavaScript files or script blocks in the html
|
* Transform links in JavaScript files or script blocks in the html
|
||||||
*/
|
*/
|
||||||
if (/\b(javascript|html)\b/i.test(mimetype)) {
|
if (/\b(javascript|html)\b/i.test(mimetype)) {
|
||||||
var regexpZimitJavascriptLinks = /['"(]((?:https?:)?\/\/[^'"?#)]+)['"?#)]/ig;
|
|
||||||
data = data.replace(regexpZimitJavascriptLinks, function (match, url) {
|
data = data.replace(regexpZimitJavascriptLinks, function (match, url) {
|
||||||
var newBlock = match;
|
var newBlock = match;
|
||||||
var assetUrl = url;
|
var assetUrl = url;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user