mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-09 12:19:46 -04:00
Backport changes from Kiwix JS
This commit is contained in:
parent
8d50d1714c
commit
d839f29a7d
@ -1031,7 +1031,7 @@ document.getElementById('btnConfigure').addEventListener('click', function () {
|
||||
setTab();
|
||||
if (params.themeChanged) {
|
||||
params.themeChanged = false;
|
||||
var archiveName = appstate.selectedArchive ? appstate.selectedArchive._file.name : null;
|
||||
var archiveName = appstate.selectedArchive ? appstate.selectedArchive.file.name : null;
|
||||
if (archiveName && ~params.lastPageVisit.indexOf(archiveName)) {
|
||||
goToArticle(params.lastPageVisit.replace(/@kiwixKey@.+$/, ''));
|
||||
}
|
||||
@ -1775,7 +1775,7 @@ document.getElementById('manipulateImagesCheck').addEventListener('click', funct
|
||||
} else if (window.nw) {
|
||||
uiUtil.systemAlert('Unfortunately there is currently no way to save an image to disk in the NWJS version of this app.<br>You can do this in the PWA version: please visit https://pwa.kiwix.org.');
|
||||
} else if (params.contentInjectionMode === 'serviceworker' && appstate.selectedArchive &&
|
||||
!/wikipedia|wikivoyage|mdwiki|wiktionary/i.test(appstate.selectedArchive._file.name)) {
|
||||
!/wikipedia|wikivoyage|mdwiki|wiktionary/i.test(appstate.selectedArchive.file.name)) {
|
||||
uiUtil.systemAlert('Please be aware that Image manipulation can interfere with non-Wikimedia ZIMs (particularly ZIMs that have active content). If you cannot access the articles in such a ZIM, please turn this setting off.');
|
||||
} else if (/PWA/.test(params.appType) && params.contentInjectionMode === 'jquery') {
|
||||
uiUtil.systemAlert('Be aware that this option may interfere with active content if you switch to Service Worker mode.');
|
||||
@ -3381,7 +3381,7 @@ function setLocalArchiveFromArchiveList (archive) {
|
||||
readNodeDirectoryAndCreateNodeFileObjects(params.pickedFolder, archive)
|
||||
.then(function (fileset) {
|
||||
var selectedFiles = fileset[0];
|
||||
if (appstate.selectedArchive && appstate.selectedArchive._file._files[0].name === selectedFiles[0].name) {
|
||||
if (appstate.selectedArchive && appstate.selectedArchive.file._files[0].name === selectedFiles[0].name) {
|
||||
document.getElementById('btnHome').click();
|
||||
} else {
|
||||
setLocalArchiveFromFileList(selectedFiles);
|
||||
@ -3947,18 +3947,18 @@ function archiveReadyCallback (archive) {
|
||||
// Ensure that the new ZIM output is initially sent to the iframe (e.g. if the last article was loaded in a window)
|
||||
// (this only affects jQuery mode)
|
||||
appstate.target = 'iframe';
|
||||
appstate.wikimediaZimLoaded = /wikipedia|wikivoyage|mdwiki|wiktionary/i.test(archive._file.name);
|
||||
appstate.wikimediaZimLoaded = /wikipedia|wikivoyage|mdwiki|wiktionary/i.test(archive.file.name);
|
||||
appstate.pureMode = false;
|
||||
// These ZIM types have so much dynamic content that we have to allow all images
|
||||
if (params.imageDisplay && (/gutenberg|phet/i.test(archive._file.name) ||
|
||||
if (params.imageDisplay && (/gutenberg|phet/i.test(archive.file.name) ||
|
||||
// params.isLandingPage ||
|
||||
/kolibri/i.test(archive._file.creator) ||
|
||||
/kolibri/i.test(archive.creator) ||
|
||||
params.zimType === 'zimit')) {
|
||||
params.imageDisplayMode = 'all';
|
||||
if (params.zimType !== 'zimit') {
|
||||
// For some archive types (Gutenberg, PhET, Kolibri at least), we have to get out of the way and allow the Service Worker
|
||||
// to act as a transparent passthrough (this key will be read in the handleMessageChannelMessage function)
|
||||
console.debug('*** Activating pureMode for ZIM: ' + archive._file.name + ' ***');
|
||||
console.debug('*** Activating pureMode for ZIM: ' + archive.file.name + ' ***');
|
||||
appstate.pureMode = true;
|
||||
}
|
||||
}
|
||||
@ -3992,8 +3992,8 @@ function archiveReadyCallback (archive) {
|
||||
}
|
||||
}
|
||||
// The archive is set : go back to home page to start searching
|
||||
params.storedFile = archive._file._files[0].name;
|
||||
params.storedFilePath = archive._file._files[0].path ? archive._file._files[0].path : '';
|
||||
params.storedFile = archive.file._files[0].name;
|
||||
params.storedFilePath = archive.file._files[0].path ? archive.file._files[0].path : '';
|
||||
settingsStore.setItem('lastSelectedArchive', params.storedFile, Infinity);
|
||||
settingsStore.setItem('lastSelectedArchivePath', params.storedFilePath, Infinity);
|
||||
if (!~params.lastPageVisit.indexOf(params.storedFile.replace(/\.zim(\w\w)?$/, ''))) {
|
||||
@ -4648,7 +4648,7 @@ function readArticle (dirEntry) {
|
||||
uiUtil.clearSpinner();
|
||||
});
|
||||
} else if (params.contentInjectionMode === 'serviceworker') {
|
||||
articleContainer = window.open('../' + appstate.selectedArchive._file.name + '/' + dirEntry.namespace + '/' + encodeURIComponent(dirEntry.url),
|
||||
articleContainer = window.open('../' + appstate.selectedArchive.file.name + '/' + dirEntry.namespace + '/' + encodeURIComponent(dirEntry.url),
|
||||
params.windowOpener === 'tab' ? '_blank' : encodeURIComponent(dirEntry.title | mimeType),
|
||||
params.windowOpener === 'window' ? 'toolbar=0,location=0,menubar=0,width=800,height=600,resizable=1,scrollbars=1' : null);
|
||||
appstate.target = 'window';
|
||||
@ -4660,7 +4660,7 @@ function readArticle (dirEntry) {
|
||||
}
|
||||
// Load cached start page if it exists and we have loaded the packaged file
|
||||
var htmlContent = 0;
|
||||
var zimName = appstate.selectedArchive._file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, '');
|
||||
var zimName = appstate.selectedArchive.file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, '');
|
||||
if (params.isLandingPage && params.cachedStartPages[zimName]) {
|
||||
htmlContent = -1;
|
||||
// @TODO: Why are we double-encoding here????? Clearly we double-decode somewhere...
|
||||
@ -4777,7 +4777,7 @@ var loaded = false;
|
||||
var articleLoadedSW = function (dirEntry) {
|
||||
if (loaded) return;
|
||||
loaded = true;
|
||||
params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive._file.name;
|
||||
params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive.file.name;
|
||||
articleDocument = articleWindow.document.documentElement;
|
||||
var doc = articleWindow.document;
|
||||
var docBody = doc.body;
|
||||
@ -4812,7 +4812,7 @@ var articleLoadedSW = function (dirEntry) {
|
||||
listenForNavigationKeys();
|
||||
// We need to keep tabs on the opened tabs or windows if the user wants right-click functionality, and also parse download links
|
||||
// We need to set a timeout so that dynamically generated URLs are parsed as well (e.g. in Gutenberg ZIMs)
|
||||
if (params.windowOpener) {
|
||||
if (params.windowOpener && !appstate.pureMode) {
|
||||
setTimeout(function () {
|
||||
parseAnchorsJQuery(dirEntry);
|
||||
}, 1500);
|
||||
@ -4973,7 +4973,7 @@ function handleMessageChannelMessage (event) {
|
||||
} else {
|
||||
loadingArticle = '';
|
||||
}
|
||||
var cacheKey = appstate.selectedArchive._file.name + '/' + title;
|
||||
var cacheKey = appstate.selectedArchive.file.name + '/' + title;
|
||||
cache.getItemFromCacheOrZIM(appstate.selectedArchive, cacheKey, dirEntry).then(function (content) {
|
||||
console.debug('SW read binary file for: ' + dirEntry.namespace + '/' + dirEntry.url);
|
||||
if (params.zimType === 'zimit' && loadingArticle) {
|
||||
@ -5051,7 +5051,7 @@ function postTransformedHTML (thisMessage, thisMessagePort, thisDirEntry) {
|
||||
if (/UWP/.test(params.appType) && (appstate.target === 'window' || appstate.messageChannelWaiting) &&
|
||||
params.imageDisplay) { thisMessage.imageDisplay = 'all'; }
|
||||
// We need to do the same for Gutenberg and PHET ZIMs
|
||||
if (params.imageDisplay && (/gutenberg|phet/i.test(appstate.selectedArchive._file.name)
|
||||
if (params.imageDisplay && (/gutenberg|phet/i.test(appstate.selectedArchive.file.name)
|
||||
// || params.isLandingPage
|
||||
)) {
|
||||
thisMessage.imageDisplay = 'all';
|
||||
@ -5203,21 +5203,21 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) {
|
||||
// Since page has been successfully loaded, store it in the browser history
|
||||
if (params.contentInjectionMode === 'jquery') pushBrowserHistoryState(dirEntry.namespace + '/' + dirEntry.url);
|
||||
// Store for fast retrieval
|
||||
params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive._file.name;
|
||||
params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive.file.name;
|
||||
if (params.rememberLastPage) settingsStore.setItem('lastPageVisit', params.lastPageVisit, Infinity);
|
||||
cache.setArticle(appstate.selectedArchive._file.name, dirEntry.namespace + '/' + dirEntry.url, htmlArticle, function () {});
|
||||
cache.setArticle(appstate.selectedArchive.file.name, dirEntry.namespace + '/' + dirEntry.url, htmlArticle, function () {});
|
||||
params.htmlArticle = htmlArticle;
|
||||
|
||||
// Replaces ZIM-style URLs of img, script, link and media tags with a data-kiwixurl to prevent 404 errors [kiwix-js #272 #376]
|
||||
// This replacement also processes the URL relative to the page's ZIM URL so that we can find the ZIM URL of the asset
|
||||
// with the correct namespace (this works for old-style -,I,J namespaces and for new-style C namespace)
|
||||
if (params.linkToWikimediaImageFile && !params.isLandingPage && /(?:wikipedia|wikivoyage|wiktionary|mdwiki)_/i.test(appstate.selectedArchive._file.name)) {
|
||||
var wikiLang = appstate.selectedArchive._file.name.replace(/(?:wikipedia|wikivoyage|wiktionary|mdwiki)_([^_]+).+/i, '$1');
|
||||
var wikimediaZimFlavour = appstate.selectedArchive._file.name.replace(/_.+/, '');
|
||||
if (params.linkToWikimediaImageFile && !params.isLandingPage && /(?:wikipedia|wikivoyage|wiktionary|mdwiki)_/i.test(appstate.selectedArchive.file.name)) {
|
||||
var wikiLang = appstate.selectedArchive.file.name.replace(/(?:wikipedia|wikivoyage|wiktionary|mdwiki)_([^_]+).+/i, '$1');
|
||||
var wikimediaZimFlavour = appstate.selectedArchive.file.name.replace(/_.+/, '');
|
||||
}
|
||||
var newBlock;
|
||||
var assetZIMUrlEnc;
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/';
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/';
|
||||
if (params.contentInjectionMode == 'jquery') {
|
||||
htmlArticle = htmlArticle.replace(params.regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, blockClose) {
|
||||
// Don't process data URIs (yet)
|
||||
@ -5284,7 +5284,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) {
|
||||
// @TODO - remove when fixed on mw-offliner: dirty patch for removing extraneous tags in ids
|
||||
htmlArticle = htmlArticle.replace(/(\bid\s*=\s*"[^\s}]+)\s*\}[^"]*/g, '$1');
|
||||
// @TODO - remove when fixed in MDwiki ZIM: dirty patch for removing erroneously hard-coded style
|
||||
if (/^mdwiki/.test(appstate.selectedArchive._file.name)) htmlArticle = htmlArticle.replace(/(class=['"]thumbinner[^>]+style=['"]width\s*:\s*)\d+px/ig, '$1320px');
|
||||
if (/^mdwiki/.test(appstate.selectedArchive.file.name)) htmlArticle = htmlArticle.replace(/(class=['"]thumbinner[^>]+style=['"]width\s*:\s*)\d+px/ig, '$1320px');
|
||||
// Remove landing page scripts that don't work in SW mode
|
||||
htmlArticle = htmlArticle.replace(/<script\b[^>]+-\/[^>]*((?:images_loaded|masonry)\.min|article_list_home)\.js"[^<]*<\/script>/gi, '');
|
||||
// Set max-width for infoboxes (now set in -/s/styles.css)
|
||||
@ -5399,7 +5399,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) {
|
||||
? false : params.useMathJax;
|
||||
// Detect raw MathML on page for certain ZIMs that are expected to have it
|
||||
params.containsMathTexRaw = params.useMathJax &&
|
||||
/stackexchange|askubuntu|superuser|stackoverflow|mathoverflow|serverfault|stackapps|proofwiki/i.test(appstate.selectedArchive._file.name)
|
||||
/stackexchange|askubuntu|superuser|stackoverflow|mathoverflow|serverfault|stackapps|proofwiki/i.test(appstate.selectedArchive.file.name)
|
||||
? /[^\\](\$\$?)((?:\\\$|(?!\1)[\s\S])+)\1/.test(htmlArticle) : false;
|
||||
|
||||
// if (params.containsMathTexRaw) {
|
||||
@ -5572,7 +5572,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) {
|
||||
blobArray.push([title, cssBlobCache.get(title)]);
|
||||
injectCSS();
|
||||
} else {
|
||||
var cacheKey = appstate.selectedArchive._file.name + '/' + title;
|
||||
var cacheKey = appstate.selectedArchive.file.name + '/' + title;
|
||||
cache.getItemFromCacheOrZIM(appstate.selectedArchive, cacheKey).then(function (content) {
|
||||
// DEV: Uncomment line below and break on next to capture cssContent for local filesystem cache
|
||||
// var cssContent = util.uintToString(content);
|
||||
@ -5884,7 +5884,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) {
|
||||
// If the request was not initiated by an existing controlled window, we instantiate the request here
|
||||
if (!appstate.messageChannelWaiting) {
|
||||
// We put the ZIM filename as a prefix in the URL, so that browser caches are separate for each ZIM file
|
||||
var newLocation = '../' + appstate.selectedArchive._file.name + '/' + dirEntry.namespace + '/' + encodedUrl;
|
||||
var newLocation = '../' + appstate.selectedArchive.file.name + '/' + dirEntry.namespace + '/' + encodedUrl;
|
||||
if (navigator.serviceWorker.controller) {
|
||||
loaded = false;
|
||||
articleWindow.location.href = newLocation;
|
||||
@ -6053,7 +6053,7 @@ function addListenersToLink (a, href, baseUrl) {
|
||||
e.stopPropagation();
|
||||
anchorParameter = href.match(/#([^#;]+)$/);
|
||||
anchorParameter = anchorParameter ? anchorParameter[1] : '';
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/';
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/';
|
||||
var zimRoot = indexRoot.replace(/^.+?\/www\//, '/');
|
||||
var zimUrl;
|
||||
var zimUrlFullEncoding;
|
||||
@ -6398,7 +6398,7 @@ function goToArticle (path, download, contentType, pathEnc) {
|
||||
clearFindInArticle();
|
||||
var shortTitle = path.replace(/[^/]+\//g, '').substring(0, 18);
|
||||
uiUtil.pollSpinner('Loading ' + shortTitle);
|
||||
var zimName = appstate.selectedArchive._file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, '');
|
||||
var zimName = appstate.selectedArchive.file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, '');
|
||||
if (~path.indexOf(params.cachedStartPages[zimName])) {
|
||||
goToMainArticle();
|
||||
return;
|
||||
@ -6426,7 +6426,7 @@ function goToArticle (path, download, contentType, pathEnc) {
|
||||
} else if (download || /\/(epub|pdf|zip|.*opendocument|.*officedocument|tiff|mp4|webm|mpeg|octet-stream)\b/i.test(mimetype)) {
|
||||
// PDFs can be treated as a special case, as they can be displayed directly in a browser window or tab in most browsers (but not UWP)
|
||||
if (!/UWP/.test(params.appType) && params.contentInjectionMode === 'serviceworker' && (/\/pdf\b/.test(mimetype) || /\.pdf([?#]|$)/i.test(dirEntry.url))) {
|
||||
window.open(document.location.pathname.replace(/[^/]+$/, '') + appstate.selectedArchive._file.name + '/' + pathForServiceWorker,
|
||||
window.open(document.location.pathname.replace(/[^/]+$/, '') + appstate.selectedArchive.file.name + '/' + pathForServiceWorker,
|
||||
params.windowOpener === 'tab' ? '_blank' : 'Download PDF',
|
||||
params.windowOpener === 'window' ? 'toolbar=0,location=0,menubar=0,width=800,height=600,resizable=1,scrollbars=1' : null);
|
||||
} else {
|
||||
@ -6462,7 +6462,7 @@ function goToRandomArticle () {
|
||||
// We fall back to the old A namespace to support old ZIM files without a text/html MIME type for articles
|
||||
// DEV: If minorVersion is 1, then we are using a v1 article-only title listing. By definition,
|
||||
// all dirEntries in an article-only listing must be articles.
|
||||
if (appstate.selectedArchive._file.minorVersion === 1 || /text\/html\b/i.test(dirEntry.getMimetype()) ||
|
||||
if (appstate.selectedArchive.file.minorVersion === 1 || /text\/html\b/i.test(dirEntry.getMimetype()) ||
|
||||
params.zimType !== 'zimit' && dirEntry.namespace === 'A') {
|
||||
params.isLandingPage = false;
|
||||
alertBoxHeader.style.display = 'none';
|
||||
|
@ -105,7 +105,7 @@ function extractImages (images, callback) {
|
||||
return;
|
||||
}
|
||||
// Zimit files (at least) will sometimes have a ZIM prefix, but we are extracting raw here
|
||||
title = title.replace(appstate.selectedArchive._file.name + '/', '');
|
||||
title = title.replace(appstate.selectedArchive.file.name + '/', '');
|
||||
// Zimit files store URLs encoded!
|
||||
if (params.zimType === 'zimit') title = encodeURI(title);
|
||||
appstate.selectedArchive.getDirEntryByPath(title).then(function (dirEntry) {
|
||||
@ -242,7 +242,7 @@ function prepareImagesServiceWorker (win, forPrinting) {
|
||||
}, 1000);
|
||||
if (!forPrinting && !documentImages.length) return;
|
||||
var imageHtml;
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/';
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/';
|
||||
for (var i = 0, l = documentImages.length; i < l; i++) {
|
||||
// Process Wikimedia MathML, but not if we'll be using the jQuery routine later
|
||||
if (!(params.manipulateImages || params.allowHTMLExtraction)) {
|
||||
@ -312,7 +312,7 @@ function prepareImagesJQuery (win, forPrinting) {
|
||||
container = win;
|
||||
var doc = container.document;
|
||||
var documentImages = doc.querySelectorAll('img[data-kiwixurl], video, audio');
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/';
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/';
|
||||
indexRoot = indexRoot.replace(/^\//, '');
|
||||
// Zimit ZIMs work better if all images are extracted
|
||||
if (params.zimType === 'zimit') forPrinting = true;
|
||||
@ -331,7 +331,7 @@ function prepareImagesJQuery (win, forPrinting) {
|
||||
image.style.opacity = '0';
|
||||
// Set a minimum width to avoid some images not rendering in squashed hidden tables
|
||||
if (params.displayHiddenBlockElements && image.width && !image.style.minWidth &&
|
||||
/wiki|wiktionary/i.test(appstate.selectedArchive._file.name)) {
|
||||
/wiki|wiktionary/i.test(appstate.selectedArchive.file.name)) {
|
||||
var imgX = image.width + '';
|
||||
imgX = imgX.replace(/(\d+)$/, '$1px');
|
||||
image.style.minWidth = imgX;
|
||||
|
@ -131,7 +131,7 @@ function transformReplayUrls (dirEntry, data, mimetype, callback) {
|
||||
* Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html'
|
||||
* Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible
|
||||
*/
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name);
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name);
|
||||
if (/\bx?html\b/i.test(mimetype)) {
|
||||
var zimitPrefix = data.match(regexpGetZimitPrefix);
|
||||
// If the URL is the same as the URL with everything after the first / removed, then we are in the root directory
|
||||
@ -320,7 +320,7 @@ function transformVideoUrl (url, articleDocument, callback) {
|
||||
console.debug('TRANSFORMED VIDEO URL ' + pureUrl + ' --> \n' + transUrl);
|
||||
// If we are dealing with embedded video, we have to find the embedded URL and subsitute it
|
||||
if (/\/embed\//i.test(pureUrl)) {
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name);
|
||||
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name);
|
||||
Array.prototype.slice.call(articleDocument.querySelectorAll('iframe')).forEach(function (frame) {
|
||||
if (~frame.src.indexOf(videoId)) {
|
||||
var newUrl = window.location.origin + indexRoot + transUrl.replace(/videoembed/, '');
|
||||
|
@ -399,7 +399,7 @@ function displayActiveContentWarning (type) {
|
||||
(params.contentInjectionMode === 'jquery' ? '<b>Limited Zimit support!</b> Please <a id="swModeLink" href="#contentInjectionModeDiv" ' +
|
||||
'class="alert-link">switch to Service Worker mode</a> if your platform supports it. '
|
||||
: 'Support for <b>Zimit</b> archives is experimental. Some content (e.g. audio/video) may fail. ') +
|
||||
'You can search for content above' + (appstate.selectedArchive._file.fullTextIndex ? ' using full-text search if your app supports it, ' +
|
||||
'You can search for content above' + (appstate.selectedArchive.file.fullTextIndex ? ' using full-text search if your app supports it, ' +
|
||||
'or s' : '. S') + 'tart your search with <b>.*</b> to match part of a title. Type a <b><i>space</i></b> for the ZIM Archive Index, or ' +
|
||||
'<b><i>space / </i></b> for the URL Index. [<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' +
|
||||
'</div>';
|
||||
|
@ -1,22 +1,22 @@
|
||||
/**
|
||||
* zimArchive.js: Support for archives in ZIM format.
|
||||
*
|
||||
* Copyright 2015 Mossroy and contributors
|
||||
* License GPL v3:
|
||||
* Copyright 2015-2023 Mossroy, Jaifroid and contributors
|
||||
* Licence GPL v3:
|
||||
*
|
||||
* This file is part of Kiwix.
|
||||
*
|
||||
* Kiwix is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* it under the terms of the GNU General Public Licence as published by
|
||||
* the Free Software Foundation, either version 3 of the Licence, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Kiwix is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
* GNU General Public Licence for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* You should have received a copy of the GNU General Public Licence
|
||||
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
|
||||
@ -34,10 +34,17 @@ import utf8 from './utf8.js';
|
||||
/**
|
||||
* ZIM Archive
|
||||
*
|
||||
*
|
||||
* @typedef ZIMArchive
|
||||
* @property {ZIMFile} _file The ZIM file (instance of ZIMFile, that might physically be split into several actual files)
|
||||
* @property {String} _language Language of the content
|
||||
* @property {ZIMFile} file The ZIM file (instance of ZIMFile, that might physically be split into several actual _files)
|
||||
* @property {String} counter Counter of various types of content in the archive
|
||||
* @property {String} creator Creator of the content
|
||||
* @property {String} date Date of the creation of the archive
|
||||
* @property {String} description Description of the content
|
||||
* @property {String} language Language of the content
|
||||
* @property {String} name Name of the archive
|
||||
* @property {String} publisher Publisher of the content
|
||||
* @property {String} title Title of the content
|
||||
* @property {String} zimType Extended property: currently either 'open' for OpenZIM file type, or 'zimit' for the warc2zim file type used by Zimit
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -66,17 +73,16 @@ var LZ;
|
||||
*/
|
||||
function ZIMArchive (storage, path, callbackReady, callbackError) {
|
||||
var that = this;
|
||||
that._file = null;
|
||||
that._language = ''; // @TODO
|
||||
that.file = null;
|
||||
var createZimfile = function (fileArray) {
|
||||
zimfile.fromFileArray(fileArray).then(function (file) {
|
||||
that._file = file;
|
||||
that.file = file;
|
||||
// Clear the previous libzimWoker
|
||||
LZ = null;
|
||||
// Set a global parameter to report the search provider type
|
||||
params.searchProvider = 'title';
|
||||
// File has been created, but we need to add any Listings which extend the archive metadata
|
||||
that._file.setListings([
|
||||
that.file.setListings([
|
||||
// Provide here any Listings for which we need to extract metadata as key:value obects to be added to the file
|
||||
// 'ptrName' and 'countName' contain the key names to be set in the archive file object
|
||||
{
|
||||
@ -100,16 +106,16 @@ function ZIMArchive (storage, path, callbackReady, callbackError) {
|
||||
}
|
||||
]).then(function () {
|
||||
// There is currently an exception thrown in the libzim wasm if we attempt to load a split ZIM archive, so we work around
|
||||
var isSplitZim = /\.zima.$/i.test(that._file._files[0].name);
|
||||
var isSplitZim = /\.zima.$/i.test(that.file._files[0].name);
|
||||
var libzimReaderType = params.debugLibzimASM || ('WebAssembly' in self ? 'wasm' : 'asm');
|
||||
if (that._file.fullTextIndex && params.debugLibzimASM !== 'disable' && (params.debugLibzimASM || !isSplitZim &&
|
||||
if (that.file.fullTextIndex && params.debugLibzimASM !== 'disable' && (params.debugLibzimASM || !isSplitZim &&
|
||||
// The ASM implementation requires Atomics support, whereas the WASM implementation does not
|
||||
(typeof Atomics !== 'undefined' || libzimReaderType === 'wasm') &&
|
||||
// Note that NWJS currently throws due to problems with Web Worker context, and Android is very slow unless we use OPFS
|
||||
!(/Android/.test(params.appType) && !params.useOPFS) && !(window.nw && that._file._files[0].readMode === 'electron'))) {
|
||||
!(/Android/.test(params.appType) && !params.useOPFS) && !(window.nw && that.file._files[0].readMode === 'electron'))) {
|
||||
console.log('Instantiating libzim ' + libzimReaderType + ' Web Worker...');
|
||||
LZ = new Worker('js/lib/libzim-' + libzimReaderType + '.js');
|
||||
that.callLibzimWorker({ action: 'init', files: that._file._files }).then(function (msg) {
|
||||
that.callLibzimWorker({ action: 'init', files: that.file._files }).then(function (msg) {
|
||||
// console.debug(msg);
|
||||
params.searchProvider = 'fulltext: ' + libzimReaderType;
|
||||
// Update the API panel
|
||||
@ -120,7 +126,7 @@ function ZIMArchive (storage, path, callbackReady, callbackError) {
|
||||
});
|
||||
} else {
|
||||
// var message = 'Full text searching is not available because ';
|
||||
if (!that._file.fullTextIndex) {
|
||||
if (!that.file.fullTextIndex) {
|
||||
params.searchProvider += ': no_fulltext'; // message += 'this ZIM does not have a full-text index.';
|
||||
} else if (isSplitZim) {
|
||||
params.searchProvider += ': split_zim'; // message += 'the ZIM archive is split.';
|
||||
@ -134,24 +140,35 @@ function ZIMArchive (storage, path, callbackReady, callbackError) {
|
||||
params.searchProvider += ': unknown';
|
||||
}
|
||||
uiUtil.reportSearchProviderToAPIStatusPanel(params.searchProvider);
|
||||
// uiUtil.systemAlert(message);
|
||||
}
|
||||
// Set the archive file type ('open' or 'zimit')
|
||||
params.zimType = that.setZimType();
|
||||
// var thisCallbackReady = callbackReady;
|
||||
// Add any metadata from the M/ namespace that you need access to here
|
||||
// Add time-critical metadata from the M/ namespace that you need early access to here
|
||||
// Note that adding metadata here delays the reporting of the ZIM archive as ready
|
||||
// Further metadata are added in the background below, and can be accessed later
|
||||
Promise.all([
|
||||
that.addMetadataToZIMFile('Creator'),
|
||||
that.addMetadataToZIMFile('Name')
|
||||
that.addMetadataToZIMFile('Language')
|
||||
]).then(function () {
|
||||
// If the arhchive name doesn't end in `.zim`, we add it to the metadata
|
||||
that._file.name = that._file.name.replace(/\.zim\s*$/i, '') + '.zim';
|
||||
console.debug('ZIMArchive ready, metadata will be added in the background');
|
||||
// All listings should be loaded, so we can now call the callback
|
||||
callbackReady(that);
|
||||
});
|
||||
// DEV: Currently, extended listings are only used for title (=article) listings when the user searches
|
||||
// for an article or uses the Random button, by which time the listings will have been extracted.
|
||||
// If, in the future, listings are used in a more time-critical manner, consider forcing a wait before
|
||||
// declaring the archive to be ready, by chaining the following callback in a .then() function of setListings.
|
||||
// Add non-time-critical metadata to archive in background so as not to delay opening of the archive
|
||||
// DEV: Note that it does not make sense to extract illustration (icon) metadata here. Instead, if you implement use of the illustration
|
||||
// metadata as icons for the loaded ZIM [kiwix-js #886], you should simply use the ZIMArdhive.getMetadata() function when needed
|
||||
setTimeout(function () {
|
||||
Promise.all([
|
||||
that.addMetadataToZIMFile('Counter'),
|
||||
that.addMetadataToZIMFile('Date'),
|
||||
that.addMetadataToZIMFile('Description'),
|
||||
that.addMetadataToZIMFile('Name'),
|
||||
that.addMetadataToZIMFile('Publisher'),
|
||||
that.addMetadataToZIMFile('Title')
|
||||
]).then(function () {
|
||||
console.debug('ZIMArchive metadata loaded:', that);
|
||||
});
|
||||
}, 1500);
|
||||
}).catch(function (err) {
|
||||
console.warn('Error setting archive listings: ', err);
|
||||
});
|
||||
@ -208,27 +225,27 @@ ZIMArchive.prototype._searchArchiveParts = function (storage, prefixPath) {
|
||||
* @returns {Boolean}
|
||||
*/
|
||||
ZIMArchive.prototype.isReady = function () {
|
||||
return this._file !== null;
|
||||
return this.file !== null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Detects whether the supplied archive is a Zimit-style archive or an OpenZIM archive and
|
||||
* sets a _file.zimType property accordingly; also returns the detected type. Extends ZIMFile.
|
||||
* sets a zimType property accordingly; also returns the detected type. Extends ZIMArchive.
|
||||
* @returns {String} Either 'zimit' for a Zimit archive, or 'open' for an OpenZIM archive
|
||||
*/
|
||||
ZIMArchive.prototype.setZimType = function () {
|
||||
var fileType = null;
|
||||
var archiveType = null;
|
||||
if (this.isReady()) {
|
||||
fileType = 'open';
|
||||
this._file.mimeTypes.forEach(function (v) {
|
||||
if (/warc-headers/i.test(v)) fileType = 'zimit';
|
||||
archiveType = 'open';
|
||||
this.file.mimeTypes.forEach(function (v) {
|
||||
if (/warc-headers/i.test(v)) archiveType = 'zimit';
|
||||
});
|
||||
this._file.zimType = fileType;
|
||||
console.debug('Archive type set to: ' + fileType);
|
||||
this.zimType = archiveType;
|
||||
console.debug('Archive type set to: ' + archiveType);
|
||||
} else {
|
||||
console.error('ZIMArchive is not ready! Cannot set ZIM type.');
|
||||
}
|
||||
return fileType;
|
||||
return archiveType;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -238,11 +255,11 @@ ZIMArchive.prototype.setZimType = function () {
|
||||
*/
|
||||
ZIMArchive.prototype.getMainPageDirEntry = function (callback) {
|
||||
if (this.isReady()) {
|
||||
var mainPageUrlIndex = this._file.mainPage;
|
||||
var mainPageUrlIndex = this.file.mainPage;
|
||||
var that = this;
|
||||
this._file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) {
|
||||
this.file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) {
|
||||
// Filter out Zimit files that we cannot handle without error
|
||||
if (that._file.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry);
|
||||
if (that.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry);
|
||||
callback(dirEntry);
|
||||
});
|
||||
}
|
||||
@ -254,7 +271,7 @@ ZIMArchive.prototype.getMainPageDirEntry = function (callback) {
|
||||
* @returns {DirEntry}
|
||||
*/
|
||||
ZIMArchive.prototype.parseDirEntryId = function (dirEntryId) {
|
||||
return zimDirEntry.DirEntry.fromStringId(this._file, dirEntryId);
|
||||
return zimDirEntry.DirEntry.fromStringId(this.file, dirEntryId);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -318,7 +335,7 @@ ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noIn
|
||||
var prefixNameSpaces = '';
|
||||
if (search.searchUrlIndex) {
|
||||
var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//;
|
||||
if (that._file.zimType === 'zimit' && cns === 'C') {
|
||||
if (that.zimType === 'zimit' && cns === 'C') {
|
||||
// We have to account for the Zimit prefix in Type 1 ZIMs
|
||||
rgxSplitPrefix = /^(?:[CMWX]\/)?(?:[AH]\/)?/;
|
||||
}
|
||||
@ -425,7 +442,7 @@ ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noIn
|
||||
ZIMArchive.prototype.getContentNamespace = function () {
|
||||
var errorText;
|
||||
if (this.isReady()) {
|
||||
var ver = this._file.minorVersion;
|
||||
var ver = this.file.minorVersion;
|
||||
// DEV: There are currently only two defined values for minorVersion in the OpenZIM specification
|
||||
// If this changes, adapt the error checking and return values
|
||||
if (ver > 1) {
|
||||
@ -455,11 +472,11 @@ ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function (prefix, s
|
||||
prefix = prefix || '';
|
||||
var cns = this.getContentNamespace();
|
||||
// Search v1 article listing if available, otherwise fallback to v0
|
||||
var articleCount = this._file.articleCount || this._file.entryCount;
|
||||
var searchFunction = appstate.selectedArchive._file.dirEntryByTitleIndex;
|
||||
var articleCount = this.file.articleCount || this.file.entryCount;
|
||||
var searchFunction = appstate.selectedArchive.file.dirEntryByTitleIndex;
|
||||
if (search.searchUrlIndex) {
|
||||
articleCount = this._file.entryCount;
|
||||
searchFunction = appstate.selectedArchive._file.dirEntryByUrlIndex;
|
||||
articleCount = this.file.entryCount;
|
||||
searchFunction = appstate.selectedArchive.file.dirEntryByUrlIndex;
|
||||
}
|
||||
util.binarySearch(startIndex, articleCount, function(i) {
|
||||
return searchFunction(i).then(function(dirEntry) {
|
||||
@ -543,7 +560,7 @@ ZIMArchive.prototype.findDirEntriesFromFullTextSearch = function (search, dirEnt
|
||||
// We give ourselves an overhead in caclulating the results needed, because full-text search will return some results already found
|
||||
// var resultsNeeded = Math.floor(params.maxSearchResultsSize - dirEntries.length / 2);
|
||||
var resultsNeeded = number || params.maxSearchResultsSize;
|
||||
return this.callLibzimWorker({action: "search", text: search.prefix, numResults: resultsNeeded}).then(function (results) {
|
||||
return this.callLibzimWorker({ action: 'search', text: search.prefix, numResults: resultsNeeded }).then(function (results) {
|
||||
if (results) {
|
||||
var dirEntryPaths = [];
|
||||
var fullTextPaths = [];
|
||||
@ -616,8 +633,8 @@ ZIMArchive.prototype.callLibzimWorker = function (parameters) {
|
||||
*/
|
||||
ZIMArchive.prototype.resolveRedirect = function (dirEntry, callback) {
|
||||
var that = this;
|
||||
this._file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) {
|
||||
if (that._file.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry);
|
||||
this.file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) {
|
||||
if (that.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry);
|
||||
callback(resolvedDirEntry);
|
||||
});
|
||||
};
|
||||
@ -729,14 +746,14 @@ ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, original
|
||||
var revisedPath = path.replace(/.*?((?:C\/A|A)\/(?!.*(?:C\/A|A)).+)$/, '$1');
|
||||
if (revisedPath !== path) {
|
||||
console.warn('*** Revised path from ' + path + '\nto: ' + revisedPath + ' ***');
|
||||
if (appstate.selectedArchive._file.zimType === 'zimit') {
|
||||
if (appstate.selectedArchive.zimType === 'zimit') {
|
||||
console.debug('*** DEV: Consider correcting this error in tranformZimit.js ***');
|
||||
}
|
||||
path = revisedPath;
|
||||
}
|
||||
}
|
||||
return util.binarySearch(0, this._file.entryCount, function(i) {
|
||||
return that._file.dirEntryByUrlIndex(i).then(function(dirEntry) {
|
||||
return util.binarySearch(0, this.file.entryCount, function(i) {
|
||||
return that.file.dirEntryByUrlIndex(i).then(function(dirEntry) {
|
||||
var url = dirEntry.namespace + "/" + dirEntry.url;
|
||||
if (path < url) {
|
||||
return -1;
|
||||
@ -748,15 +765,15 @@ ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, original
|
||||
});
|
||||
}).then(function (index) {
|
||||
if (index === null) return null;
|
||||
return that._file.dirEntryByUrlIndex(index);
|
||||
return that.file.dirEntryByUrlIndex(index);
|
||||
}).then(function (dirEntry) {
|
||||
// Filter Zimit dirEntries and do somee initial transforms
|
||||
if (that._file.zimType === 'zimit') {
|
||||
if (that.zimType === 'zimit') {
|
||||
dirEntry = transformZimit.filterReplayFiles(dirEntry);
|
||||
}
|
||||
if (!dirEntry) {
|
||||
// We couldn't get the dirEntry, so look it up the Zimit header
|
||||
if (!zimitResolving && that._file.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) {
|
||||
if (!zimitResolving && that.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) {
|
||||
// We need to look the file up in the Header namespace (double replacement ensures both types of ZIM are supported)
|
||||
var oldPath = path;
|
||||
path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/');
|
||||
@ -831,9 +848,9 @@ function fuzzySearch(path, search) {
|
||||
*/
|
||||
ZIMArchive.prototype.getRandomDirEntry = function (callback) {
|
||||
// Prefer an article-only (v1) title pointer list, if available
|
||||
var articleCount = this._file.articleCount || this._file.entryCount;
|
||||
var articleCount = this.file.articleCount || this.file.entryCount;
|
||||
var index = Math.floor(Math.random() * articleCount);
|
||||
this._file.dirEntryByTitleIndex(index).then(callback);
|
||||
this.file.dirEntryByTitleIndex(index).then(callback);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -869,7 +886,7 @@ ZIMArchive.prototype.addMetadataToZIMFile = function (key) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
that.getMetadata(key, function (data) {
|
||||
data = data || '';
|
||||
that._file[lcaseKey] = data;
|
||||
that[lcaseKey] = data;
|
||||
resolve(data);
|
||||
});
|
||||
});
|
||||
|
@ -2,24 +2,28 @@
|
||||
* zimfile.js: Low-level ZIM file reader.
|
||||
*
|
||||
* Copyright 2015 Mossroy and contributors
|
||||
* License GPL v3:
|
||||
* Licence GPL v3:
|
||||
*
|
||||
* This file is part of Kiwix.
|
||||
*
|
||||
* Kiwix is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* it under the terms of the GNU General Public Licence as published by
|
||||
* the Free Software Foundation, either version 3 of the Licence, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Kiwix is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
* GNU General Public Licence for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* You should have received a copy of the GNU General Public Licence
|
||||
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
/* global params, appstate */
|
||||
|
||||
import xz from './xzdec_wrapper.js';
|
||||
import zstd from './zstddec_wrapper.js';
|
||||
import util from './util.js';
|
||||
@ -27,10 +31,6 @@ import utf8 from './utf8.js';
|
||||
import zimDirEntry from './zimDirEntry.js';
|
||||
import FileCache from './filecache.js';
|
||||
|
||||
/* global params, appstate */
|
||||
|
||||
'use strict';
|
||||
|
||||
/**
|
||||
* This code makes an assumption that no Directory Entry will be larger that MAX_SUPPORTED_DIRENTRY_SIZE bytes.
|
||||
* If a larger dirEntry is encountered, a warning will display in console. Increase this value if necessary.
|
||||
@ -112,7 +112,6 @@ var readInt = function (data, offset, size) {
|
||||
* @property {Integer} mimeListPos Position of the MIME type list (also header size)
|
||||
* @property {Integer} mainPage Main page or 0xffffffff if no main page
|
||||
* @property {Integer} layoutPage Layout page or 0xffffffffff if no layout page
|
||||
* @property {String} zimType Extended property: currently either 'open' for OpenZIM file type, or 'zimit' for the warc2zim file type used by Zimit (set in zimArchive.js)
|
||||
* @property {Map} mimeTypes Extended property: the ZIM file's MIME type table rendered as a Map (calculated entry)
|
||||
*/
|
||||
|
||||
@ -230,7 +229,7 @@ ZIMFile.prototype.dirEntry = function (offset) {
|
||||
* @returns {Promise<DirEntry>} A Promise for the requested DirEntry
|
||||
*/
|
||||
ZIMFile.prototype.dirEntryByUrlIndex = function (index) {
|
||||
var that = appstate.selectedArchive._file;
|
||||
var that = appstate.selectedArchive.file;
|
||||
if (!that) return Promise.resolve(null);
|
||||
return that._readInteger(that.urlPtrPos + index * 8, 8).then(function (dirEntryPos) {
|
||||
return that.dirEntry(dirEntryPos);
|
||||
@ -243,7 +242,7 @@ ZIMFile.prototype.dirEntryByUrlIndex = function (index) {
|
||||
* @returns {Promise<DirEntry>} A Promise for the requested DirEntry
|
||||
*/
|
||||
ZIMFile.prototype.dirEntryByTitleIndex = function (index) {
|
||||
var that = appstate.selectedArchive._file;
|
||||
var that = appstate.selectedArchive.file;
|
||||
// Use v1 title pointerlist if available, or fall back to legacy v0 list
|
||||
var ptrList = that.articlePtrPos || that.titlePtrPos;
|
||||
return that._readInteger(ptrList + index * 4, 4).then(function (urlIndex) {
|
||||
@ -333,7 +332,6 @@ ZIMFile.prototype.setListings = function (listings) {
|
||||
// If we are in a legacy ZIM archive, we need to calculate the true article count (of entries in the A namespace)
|
||||
// This effectively emulates the v1 article pointerlist
|
||||
if (this.minorVersion === 0) {
|
||||
// console.debug('ZIM DirListing version: 0 (legacy)', this);
|
||||
// Initiate a binary search for the first or last article
|
||||
var getArticleIndexByOrdinal = function (ordinal) {
|
||||
return util.binarySearch(0, that.entryCount, function (i) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user