Enable limited support for Zimit ZIMs #249 (#248)

Former-commit-id: 7cec2025879b33b13fdec63c5ade01938ba42bab [formerly fb65802f6075bfd2de2f00c636b1c9f59b07a51f] [formerly 9d6091c79c2b24e714cb5214284aa1ebf2b0b0fb] [formerly db8e5eec98ea6333c6d34d6dd7e0c5c112a217aa [formerly cd8bb2cb5c381a6c936436cd9557f273d887264f [formerly 9b2a543c6c882c29e6889169fea914caa3136a32]]]
Former-commit-id: a65562bd36582a4098179544d7f21aa8b80f6b18 [formerly f0c743ac1332a315dc2d5f6ab07819c9e2e686c5 [formerly 16371c7c46feca2993cac90d9c4a75ffb6bb264b]]
Former-commit-id: c0629ff88c1118546471b0ac71e591eb8bf57ee5 [formerly 556babe8667fedfa263936450321e787b8cb522d]
Former-commit-id: 86344e2f0b85c54f7af797613f0d63ae0836369e
This commit is contained in:
Jaifroid 2022-04-25 01:27:36 +01:00 committed by GitHub
parent 8ed9c9ecd8
commit 03128c6b58
10 changed files with 142 additions and 28 deletions

View File

@ -2,6 +2,7 @@
## In-progress release 1.9.8
* FEATURE: Experimental support for reading Zimit ZIM archives
* BUILD: Nightly builds of main app are now uploaded to a new server (with sftp instead of ssh)
* BUILD: Deploy PWA image on a k8s cluster
* BUILD: PWA images are now uploaded to ghcr.io instead of Docker Hub

View File

@ -1,7 +1,7 @@
{
"name": "kiwix-js-electron",
"productName": "Kiwix JS Electron",
"version": "1.9.7-E",
"version": "1.9.8-E",
"description": "Kiwix JS packaged for the Electron framework",
"main": "main.js",
"build": {
@ -106,3 +106,8 @@
}
}

View File

@ -1,7 +1,7 @@
{
"name": "kiwix_js_windows",
"productName": "Kiwix JS Windows",
"version": "1.9.5-N",
"version": "1.9.8-N",
"description": "Kiwix JS Windows for NWJS",
"main": "index.html",
"domain": "kiwix.js.windows",
@ -88,3 +88,8 @@
}
}

View File

@ -11,7 +11,7 @@ if (-Not $only32bit) {
}
$version10 = "0.62.0" # <<< value updated automatically from package.json if launched from Create-DraftRelease
$versionXP = "0.14.7"
$appBuild = "1.9.5-N" # <<< value updated auotmatically from package.json if launched from Create-DraftRelease
$appBuild = "1.9.8-N" # <<< value updated auotmatically from package.json if launched from Create-DraftRelease
# $ZIMbase = "wikipedia_en_100"
# Check that the dev has included the correct archive in this branch
$init_params = Get-Content -Raw "$PSScriptRoot\..\www\js\init.js"
@ -102,3 +102,8 @@ foreach ($build in $builds) {
}

View File

@ -29,7 +29,7 @@
* download and install a new copy; we have to hard code this here because it is needed before any other file
* is cached in APP_CACHE
*/
const appVersion = '1.9.7';
const appVersion = '1.9.8';
/**
* The name of the Cache API cache in which assets defined in regexpCachedContentTypes will be stored

View File

@ -123,6 +123,7 @@
<div id="update" class="update">
<h3 style="margin-top:0;">Changes in version <span class="version">1.0</span></h3>
<ul style="padding-left: 15px;">
<li>Experimental support for reading Zimit ZIM archives</li>
<li>Nightly builds of main app are now uploaded to a new server (with sftp instead of ssh)</li>
<li>Deploy PWA image on a k8s cluster</li>
<li>PWA images are now uploaded to ghcr.io instead of Docker Hub</li>

View File

@ -3507,7 +3507,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
appstate.selectedArchive.resolveRedirect(dirEntry, readArticle);
} else {
//TESTING//
console.log("Initiating HTML load...");
console.log("Initiating HTML load of " + dirEntry.namespace + '/' + dirEntry.url + "...");
//Set startup cookie to guard against boot loop
//Cookie will signal failure until article is fully loaded
@ -3749,7 +3749,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
var cacheKey = appstate.selectedArchive._file.name + '/' + title;
cache.getItemFromCacheOrZIM(appstate.selectedArchive, cacheKey, dirEntry).then(function (content) {
console.log('SW read binary file for: ' + dirEntry.url);
console.log('SW read binary file for: ' + dirEntry.namespace + '/' + dirEntry.url);
if (/\b(css|javascript|video|vtt|webm)\b/i.test(mimetype)) {
var shortTitle = dirEntry.url.replace(/[^/]+\//g, '').substring(0, 18);
uiUtil.pollSpinner('Getting ' + shortTitle + '...');
@ -3767,6 +3767,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
'imageDisplay': imageDisplayMode,
'content': buffer
};
// Prevent running of replay system in Zimit files (it causes blocking exceptions, and we don't use it)
if (params.zimitZim && /javascript/i.test(message.mimetype) &&
/(?:chunk\.js|\bload\.js|\bsw\.js)(?:[?#]|$)/.test(message.title)) message.content = '';
if (content.buffer) {
// In Edge Legacy, we have to transfer the buffer inside an array, whereas in Chromium, this produces an error
// due to type not being transferrable... (and already detached, which may be to do with storing in IndexedDB in Electron)
@ -3847,6 +3850,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// below, it will be further processed to calculate the ZIM URL from the relative path. This regex can cope with legitimate single
// quote marks (') in the URL.
params.regexpTagsWithZimUrl = /(<(?:img|script|link)\b[^>]*?\s)(?:src|href)(\s*=\s*(["']))(?![a-z][a-z0-9+.-]+:)(.+?)(?=\3|\?|#)([\s\S]*?>)/ig;
// Similar to above, but tailored for Zimit links
params.regexpZimitLinks = /(<(?:a|img|script|link|track)\b[^>]*?\s)(?:src|href)(=(["']))(?!#)(.+?)(?=\3|\?|#)([\s\S]*?>)/ig;
// Regex below tests the html of an article for active content [kiwix-js #466]
// It inspects every <script> block in the html and matches in the following cases: 1) the script loads a UI application called app.js;
// 2) the script block has inline content that does not contain "importScript()", "toggleOpenSection" or an "articleId" assignment
@ -3891,7 +3896,6 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
//TESTING
console.log("** HTML received **");
console.log("Loading stylesheets...");
params.isLandingPage = appstate.selectedArchive.landingPageUrl === dirEntry.namespace + '/' + dirEntry.url ?
true : params.isLandingPage;
@ -3930,13 +3934,44 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
var wikiLang = appstate.selectedArchive._file.name.replace(/(?:wikipedia|wikivoyage|wiktionary)_([^_]+).+/i, '$1');
var wikimediaZimFlavour = appstate.selectedArchive._file.name.replace(/_.+/, '');
}
// Check if we're dealing with a Zimit ZIM
params.zimitZim = /warc-headers/.test(Array.from(appstate.selectedArchive._file.mimeTypes));
if (params.isLandingPage && params.zimitZim) {
// Display Bootstrap alert regarding limited support
if (!params.hideActiveContentWarning) {
setTimeout(function () {
uiUtil.displayActiveContentWarning('zimit');
}, 1000);
}
params.zimitStartPage = htmlArticle.match(/window\.mainUrl\s*=\s*(['"])https?:\/\/([^\/]+)(.+?)\1/);
if (params.zimitStartPage && params.zimitStartPage[2] && params.zimitStartPage[3]) {
params.zimitPrefix = params.zimitStartPage[2];
params.zimitStartPage = dirEntry.namespace + '/' + params.zimitPrefix + params.zimitStartPage[3];
} else {
params.zimitStartPage = null;
}
if (params.zimitStartPage) {
params.isLandingPage = false;
goToArticle(params.zimitStartPage);
return;
}
}
var newBlock;
if (params.contentInjectionMode == 'jquery') {
htmlArticle = htmlArticle.replace(params.regexpTagsWithZimUrl, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
var assetZIMUrl = uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, params.baseURL);
var regexp = params.zimitZim ? params.regexpZimitLinks : params.regexpTagsWithZimUrl;
htmlArticle = htmlArticle.replace(regexp, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
var assetZIMUrl;
newBlock = match;
if (params.zimitZim) {
assetZIMUrl = relAssetUrl.replace(/^\//i, '/' + dirEntry.namespace + '/' + params.zimitPrefix + '/');
assetZIMUrl = assetZIMUrl.replace(/^https?:\/\//i, '/' + dirEntry.namespace + '/');
}
assetZIMUrl = /^\//.test(assetZIMUrl) ? assetZIMUrl : uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, params.baseURL);
if (/^<a\s/i.test(match)) newBlock = newBlock.replace(relAssetUrl, assetZIMUrl); // For Zimit ZIMs
// DEV: Note that deriveZimUrlFromRelativeUrl produces a *decoded* URL (and incidentally would remove any URI component
// if we had captured it). We therefore re-encode the URI with encodeURI (which does not encode forward slashes) instead
// of encodeURIComponent.
var newBlock = blockStart + 'data-kiwixurl' + equals + encodeURI(assetZIMUrl) + blockClose;
else newBlock = blockStart + 'data-kiwixurl' + equals + encodeURI(assetZIMUrl) + blockClose;
// For Wikipedia archives, hyperlink the image to the File version
if (wikiLang && /^<img/i.test(blockStart) && !/usemap=/i.test(match)) {
newBlock = '<a href="https://' + wikiLang + '.' + wikimediaZimFlavour + '.org/wiki/File:' +
@ -3951,17 +3986,35 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
htmlArticle = htmlArticle.replace(/(<(audio|video)\b(?:[^<]|<(?!\/\2))+<\/\2>)/ig, function (p0) {
return /(?:src|data-kiwixurl)\s*=\s*["']/.test(p0) ? p0 : '';
});
} else if (wikiLang) {
htmlArticle = htmlArticle.replace(params.regexpTagsWithZimUrl, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
// For Wikipedia archives, hyperlink the image to the File version
var assetZIMUrl = decodeURIComponent(relAssetUrl);
if (/^<img/i.test(blockStart) && !/usemap=/i.test(match)) {
var newBlock = '<a href="https://' + wikiLang + '.' + wikimediaZimFlavour + '.org/wiki/File:' +
assetZIMUrl.replace(/^.+\/([^/]+?\.(?:jpe?g|svg|png|gif))[^/]*$/i, '$1')
+ '" target="_blank">' + match + '</a>'
} else if (wikiLang || params.zimitZim) {
if (params.zimitZim) {
if (!params.zimitPrefix) {
params.zimitPrefix = htmlArticle.match(/link\s+rel=["']canonical["']\s+href=(['"])https?:\/\/([^\/]+)(.+?)\1/i);
params.zimitPrefix = params.zimitPrefix ? params.zimitPrefix[2] : '';
}
return newBlock || match;
});
htmlArticle = htmlArticle.replace(params.regexpZimitLinks, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
var newBlock = match;
var assetZIMUrl = relAssetUrl.replace(/^\//i, dirEntry.namespace + '/' + params.zimitPrefix + '/');
assetZIMUrl = assetZIMUrl.replace(/^https?:\/\//i, function (m0) {
var rtnVal = '';
if (/^<a\s/i.test(match)) rtnVal = '/';
return rtnVal + dirEntry.namespace + '/';
});
newBlock = newBlock.replace(relAssetUrl, assetZIMUrl);
return newBlock;
});
} else {
htmlArticle = htmlArticle.replace(params.regexpTagsWithZimUrl, function(match, blockStart, equals, quote, relAssetUrl, blockClose) {
// For Wikipedia archives, hyperlink the image to the File version
var assetZIMUrl = decodeURIComponent(relAssetUrl);
if (/^<img/i.test(blockStart) && !/usemap=/i.test(match)) {
newBlock = '<a href="https://' + wikiLang + '.' + wikimediaZimFlavour + '.org/wiki/File:' +
assetZIMUrl.replace(/^.+\/([^/]+?\.(?:jpe?g|svg|png|gif))[^/]*$/i, '$1')
+ '" target="_blank">' + match + '</a>'
}
return newBlock || match;
});
}
}
//Some documents (e.g. Ray Charles Index) can't be scrolled to the very end, as some content remains benath the footer
@ -4065,6 +4118,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
htmlArticle = htmlArticle.replace(/(<[^>]+?)onclick\s*=\s*["'][^"']+["']\s*/ig, '$1');
//Neutralize href="javascript:" links
htmlArticle = htmlArticle.replace(/href\s*=\s*["']javascript:[^"']+["']/gi, 'href=""');
} else if (/journals\.openedition\.org/i.test(params.zimitPrefix)) {
// Neutralize all inline scripts, excluding math blocks or react templates, as they cause a loop on loading article
htmlArticle = htmlArticle.replace(/<(script\b(?![^>]+type\s*=\s*["'](?:math\/|text\/html|[^"']*?math))(?:[^<]|<(?!\/script>))+<\/script)>/ig, function (p0, p1) {
return '<!-- ' + p1 + ' --!>';
});
}
//MathJax detection:
@ -4171,6 +4229,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
//Preload stylesheets [kiwix-js #149]
console.log("Loading stylesheets...");
//Set up blobArray of promises
var prefix = (window.location.protocol + '//' + window.location.host + window.location.pathname).replace(/\/[^/]*$/, '');
var cssArray = htmlArticle.match(regexpSheetHref);
@ -4741,8 +4800,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
} else if (anchorTarget) {
// It's a local anchor link : remove escapedUrl if any (see above)
anchor.setAttribute('href', '#' + anchorTarget[1]);
} else if (anchor.protocol !== currentProtocol ||
anchor.host !== currentHost) {
} else if (!(params.zimitZim && ~href.indexOf(params.zimitPrefix)) && (anchor.protocol !== currentProtocol ||
anchor.host !== currentHost)) {
// It's an external URL : we should open it in a new tab
anchor.target = '_blank';
if (anchor.protocol === 'bingmaps:') {
@ -4754,6 +4813,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
} else {
// It's a link to an article or file in the ZIM
// if (params.zimitZim) {
// // Change absolute link to ZIM link
// var testZimitAnchor = new RegExp('^https?://' + params.zimitPrefix);
// href = href.replace(testZimitAnchor, dirEntry.namespace + '/' + params.zimitPrefix);
// }
addListenersToLink(anchor, href, params.baseURL);
}
});
@ -4773,6 +4837,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
*/
function addListenersToLink(a, href, baseUrl) {
var uriComponent = uiUtil.removeUrlParameters(href);
var namespace = baseUrl.replace(/^([-ABCIJMUVWX])\/.+/, '$1');
var loadingContainer = false;
var contentType;
var downloadAttrValue;
@ -4834,7 +4899,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
e.stopPropagation();
anchorParameter = href.match(/#([^#;]+)$/);
anchorParameter = anchorParameter ? anchorParameter[1] : '';
var zimUrl = uiUtil.deriveZimUrlFromRelativeUrl(uriComponent, baseUrl);
var zimUrl;
// Patch Zimit support
// if (params.zimitZim && params.contentInjectionMode === 'serviceworker') {
// zimUrl = decodeURIComponent(uriComponent);
// } else {
zimUrl = uiUtil.deriveZimUrlFromRelativeUrl(uriComponent, baseUrl);
// }
// if (params.zimitZim && !~zimUrl.indexOf(params.zimitPrefix)) zimUrl = namespace + '/' + params.zimitPrefix + '/' + zimUrl;
goToArticle(zimUrl, downloadAttrValue, contentType);
setTimeout(reset, 1400);
};
@ -5143,7 +5215,17 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
if (dirEntry === null || dirEntry === undefined) {
uiUtil.clearSpinner();
console.error("Article with title " + path + " not found in the archive");
goToMainArticle();
if (params.zimitZim) {
path = path.replace(/^[AC]\//, 'http://');
uiUtil.systemAlert('<p>We could not find an offline version of the requested article in this Zimit archive.</p>' +
'<p>If you would like to open this page online in a new tab, please click this link:</p>' +
'<p><a href="' + path + '" target="_blank">' + path + '</a></p>');
setTab();
} else {
uiUtil.systemAlert('<p>We could not find the article ' + path + ' in this archive!</p>' +
'<p>Redirecting to landing page...</p>');
goToMainArticle();
}
} else if (download) {
appstate.selectedArchive.readBinaryFile(dirEntry, function (fileDirEntry, content) {
var mimetype = contentType || fileDirEntry.getMimetype();

View File

@ -50,7 +50,7 @@ var params = {};
*/
var appstate = {};
/******** UPDATE VERSION IN service-worker.js TO MATCH VERSION AND CHECK PWASERVER BELOW!!!!!!! *******/
params['appVersion'] = "1.9.7"; //DEV: Manually update this version when there is a new release: it is compared to the Settings Store "appVersion" in order to show first-time info, and the cookie is updated in app.js
params['appVersion'] = "1.9.8"; //DEV: Manually update this version when there is a new release: it is compared to the Settings Store "appVersion" in order to show first-time info, and the cookie is updated in app.js
/******* UPDATE THIS ^^^^^^ IN service worker AND PWA-SERVER BELOW !! ********************/
params['packagedFile'] = getSetting('packagedFile') || "wikipedia_en_climate_change_mini_2022-03.zim"; //For packaged Kiwix JS (e.g. with Wikivoyage file), set this to the filename (for split files, give the first chunk *.zimaa) and place file(s) in default storage
params['archivePath'] = "archives"; //The directory containing the packaged archive(s) (relative to app's root directory)

View File

@ -93,6 +93,7 @@ define(['uiUtil'], function (uiUtil) {
});
return;
}
if (params.zimitZim) title = title.replace(/^\//, '');
appstate.selectedArchive.getDirEntryByPath(title).then(function (dirEntry) {
return appstate.selectedArchive.readBinaryFile(dirEntry, function (fileDirEntry, content) {
image.style.background = '';

View File

@ -324,14 +324,14 @@ define(rqDef, function(util) {
/**
* Displays a Bootstrap warning alert with information about how to access content in a ZIM with unsupported active UI
*/
function displayActiveContentWarning() {
function displayActiveContentWarning(type) {
// We have to add the alert box in code, because Bootstrap removes it completely from the DOM when the user dismisses it
var alertHTML =
'<div id="activeContent" class="alert alert-warning alert-dismissible fade in" style="margin-bottom: 0;">' +
'<a href="#" class="close" data-dismiss="alert" aria-label="close">&times;</a>' +
'<strong>Unable to display active content:</strong> To use Archive Index <b><i>type a space</i></b> in the box above, or else ' +
'<a id="swModeLink" href="#contentInjectionModeDiv" class="alert-link">switch to Service Worker mode</a> ' +
'if your platform supports it. &nbsp;[<a id="stop" href="#otherSettingsDiv" class="alert-link">Permanently hide</a>]' +
'if your platform supports it. &nbsp;[<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' +
'</div>';
if (params.contentInjectionMode === 'serviceworker' && (params.manipulateImages || params.displayHiddenBlockElements || params.allowHTMLExtraction)) {
alertHTML =
@ -342,7 +342,21 @@ define(rqDef, function(util) {
params.manipulateImages ? '<a id="imModeLink" href="#imageManipulationDiv" class="alert-link">disable Image manipulation</a> ' : '') +
(params.allowHTMLExtraction ? (params.displayHiddenBlockElements || params.manipulateImages ? 'and ' : '') +
'disable Breakout link ' : '') + 'for this content to work properly. To use Archive Index <b><i>type a space</i></b> ' +
'in the box above.&nbsp;[<a id="stop" href="#otherSettingsDiv" class="alert-link">Permanently hide</a>]' +
'in the box above.&nbsp;[<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' +
'</div>';
}
if (type === 'zimit') {
alertHTML =
'<div id="activeContent" class="alert alert-warning alert-dismissible fade in" style="margin-bottom: 0;">' +
'<a href="#" class="close" data-dismiss="alert" aria-label="close">&times;</a>' +
'<strong>' + (params.contentInjectionMode === 'jquery' ? 'Limited Zimit' : 'Experimental') + ' support:</strong> ' +
(params.contentInjectionMode === 'jquery' ? 'Please <a id="swModeLink" href="#contentInjectionModeDiv" ' +
'class="alert-link">switch to Service Worker mode</a> if your platform supports it. ' :
'Please note that this app only has experimental support for <b>Zimit</b> ZIMs, and ' +
'in particular it may not run all active content. Audio and video may not work yet. ') +
'<br />Search for your content in the search box above (start your search with .* to match part of a title)' +
', or to use the Archive Index <b><i>type a space</i></b> in the box above.&nbsp;' +
'[<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' +
'</div>';
}
var alertBoxHeader = document.getElementById('alertBoxHeader');