diff --git a/www/js/app.js b/www/js/app.js index 4bfc3ad2..f05264de 100644 --- a/www/js/app.js +++ b/www/js/app.js @@ -1031,7 +1031,7 @@ document.getElementById('btnConfigure').addEventListener('click', function () { setTab(); if (params.themeChanged) { params.themeChanged = false; - var archiveName = appstate.selectedArchive ? appstate.selectedArchive._file.name : null; + var archiveName = appstate.selectedArchive ? appstate.selectedArchive.file.name : null; if (archiveName && ~params.lastPageVisit.indexOf(archiveName)) { goToArticle(params.lastPageVisit.replace(/@kiwixKey@.+$/, '')); } @@ -1775,7 +1775,7 @@ document.getElementById('manipulateImagesCheck').addEventListener('click', funct } else if (window.nw) { uiUtil.systemAlert('Unfortunately there is currently no way to save an image to disk in the NWJS version of this app.
You can do this in the PWA version: please visit https://pwa.kiwix.org.'); } else if (params.contentInjectionMode === 'serviceworker' && appstate.selectedArchive && - !/wikipedia|wikivoyage|mdwiki|wiktionary/i.test(appstate.selectedArchive._file.name)) { + !/wikipedia|wikivoyage|mdwiki|wiktionary/i.test(appstate.selectedArchive.file.name)) { uiUtil.systemAlert('Please be aware that Image manipulation can interfere with non-Wikimedia ZIMs (particularly ZIMs that have active content). If you cannot access the articles in such a ZIM, please turn this setting off.'); } else if (/PWA/.test(params.appType) && params.contentInjectionMode === 'jquery') { uiUtil.systemAlert('Be aware that this option may interfere with active content if you switch to Service Worker mode.'); @@ -3381,7 +3381,7 @@ function setLocalArchiveFromArchiveList (archive) { readNodeDirectoryAndCreateNodeFileObjects(params.pickedFolder, archive) .then(function (fileset) { var selectedFiles = fileset[0]; - if (appstate.selectedArchive && appstate.selectedArchive._file._files[0].name === selectedFiles[0].name) { + if (appstate.selectedArchive && appstate.selectedArchive.file._files[0].name === selectedFiles[0].name) { document.getElementById('btnHome').click(); } else { setLocalArchiveFromFileList(selectedFiles); @@ -3947,18 +3947,18 @@ function archiveReadyCallback (archive) { // Ensure that the new ZIM output is initially sent to the iframe (e.g. if the last article was loaded in a window) // (this only affects jQuery mode) appstate.target = 'iframe'; - appstate.wikimediaZimLoaded = /wikipedia|wikivoyage|mdwiki|wiktionary/i.test(archive._file.name); + appstate.wikimediaZimLoaded = /wikipedia|wikivoyage|mdwiki|wiktionary/i.test(archive.file.name); appstate.pureMode = false; // These ZIM types have so much dynamic content that we have to allow all images - if (params.imageDisplay && (/gutenberg|phet/i.test(archive._file.name) || + if (params.imageDisplay && (/gutenberg|phet/i.test(archive.file.name) || // params.isLandingPage || - /kolibri/i.test(archive._file.creator) || + /kolibri/i.test(archive.creator) || params.zimType === 'zimit')) { params.imageDisplayMode = 'all'; if (params.zimType !== 'zimit') { // For some archive types (Gutenberg, PhET, Kolibri at least), we have to get out of the way and allow the Service Worker // to act as a transparent passthrough (this key will be read in the handleMessageChannelMessage function) - console.debug('*** Activating pureMode for ZIM: ' + archive._file.name + ' ***'); + console.debug('*** Activating pureMode for ZIM: ' + archive.file.name + ' ***'); appstate.pureMode = true; } } @@ -3992,8 +3992,8 @@ function archiveReadyCallback (archive) { } } // The archive is set : go back to home page to start searching - params.storedFile = archive._file._files[0].name; - params.storedFilePath = archive._file._files[0].path ? archive._file._files[0].path : ''; + params.storedFile = archive.file._files[0].name; + params.storedFilePath = archive.file._files[0].path ? archive.file._files[0].path : ''; settingsStore.setItem('lastSelectedArchive', params.storedFile, Infinity); settingsStore.setItem('lastSelectedArchivePath', params.storedFilePath, Infinity); if (!~params.lastPageVisit.indexOf(params.storedFile.replace(/\.zim(\w\w)?$/, ''))) { @@ -4648,7 +4648,7 @@ function readArticle (dirEntry) { uiUtil.clearSpinner(); }); } else if (params.contentInjectionMode === 'serviceworker') { - articleContainer = window.open('../' + appstate.selectedArchive._file.name + '/' + dirEntry.namespace + '/' + encodeURIComponent(dirEntry.url), + articleContainer = window.open('../' + appstate.selectedArchive.file.name + '/' + dirEntry.namespace + '/' + encodeURIComponent(dirEntry.url), params.windowOpener === 'tab' ? '_blank' : encodeURIComponent(dirEntry.title | mimeType), params.windowOpener === 'window' ? 'toolbar=0,location=0,menubar=0,width=800,height=600,resizable=1,scrollbars=1' : null); appstate.target = 'window'; @@ -4660,7 +4660,7 @@ function readArticle (dirEntry) { } // Load cached start page if it exists and we have loaded the packaged file var htmlContent = 0; - var zimName = appstate.selectedArchive._file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, ''); + var zimName = appstate.selectedArchive.file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, ''); if (params.isLandingPage && params.cachedStartPages[zimName]) { htmlContent = -1; // @TODO: Why are we double-encoding here????? Clearly we double-decode somewhere... @@ -4777,7 +4777,7 @@ var loaded = false; var articleLoadedSW = function (dirEntry) { if (loaded) return; loaded = true; - params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive._file.name; + params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive.file.name; articleDocument = articleWindow.document.documentElement; var doc = articleWindow.document; var docBody = doc.body; @@ -4812,7 +4812,7 @@ var articleLoadedSW = function (dirEntry) { listenForNavigationKeys(); // We need to keep tabs on the opened tabs or windows if the user wants right-click functionality, and also parse download links // We need to set a timeout so that dynamically generated URLs are parsed as well (e.g. in Gutenberg ZIMs) - if (params.windowOpener) { + if (params.windowOpener && !appstate.pureMode) { setTimeout(function () { parseAnchorsJQuery(dirEntry); }, 1500); @@ -4973,7 +4973,7 @@ function handleMessageChannelMessage (event) { } else { loadingArticle = ''; } - var cacheKey = appstate.selectedArchive._file.name + '/' + title; + var cacheKey = appstate.selectedArchive.file.name + '/' + title; cache.getItemFromCacheOrZIM(appstate.selectedArchive, cacheKey, dirEntry).then(function (content) { console.debug('SW read binary file for: ' + dirEntry.namespace + '/' + dirEntry.url); if (params.zimType === 'zimit' && loadingArticle) { @@ -5051,7 +5051,7 @@ function postTransformedHTML (thisMessage, thisMessagePort, thisDirEntry) { if (/UWP/.test(params.appType) && (appstate.target === 'window' || appstate.messageChannelWaiting) && params.imageDisplay) { thisMessage.imageDisplay = 'all'; } // We need to do the same for Gutenberg and PHET ZIMs - if (params.imageDisplay && (/gutenberg|phet/i.test(appstate.selectedArchive._file.name) + if (params.imageDisplay && (/gutenberg|phet/i.test(appstate.selectedArchive.file.name) // || params.isLandingPage )) { thisMessage.imageDisplay = 'all'; @@ -5203,21 +5203,21 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) { // Since page has been successfully loaded, store it in the browser history if (params.contentInjectionMode === 'jquery') pushBrowserHistoryState(dirEntry.namespace + '/' + dirEntry.url); // Store for fast retrieval - params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive._file.name; + params.lastPageVisit = dirEntry.namespace + '/' + dirEntry.url + '@kiwixKey@' + appstate.selectedArchive.file.name; if (params.rememberLastPage) settingsStore.setItem('lastPageVisit', params.lastPageVisit, Infinity); - cache.setArticle(appstate.selectedArchive._file.name, dirEntry.namespace + '/' + dirEntry.url, htmlArticle, function () {}); + cache.setArticle(appstate.selectedArchive.file.name, dirEntry.namespace + '/' + dirEntry.url, htmlArticle, function () {}); params.htmlArticle = htmlArticle; // Replaces ZIM-style URLs of img, script, link and media tags with a data-kiwixurl to prevent 404 errors [kiwix-js #272 #376] // This replacement also processes the URL relative to the page's ZIM URL so that we can find the ZIM URL of the asset // with the correct namespace (this works for old-style -,I,J namespaces and for new-style C namespace) - if (params.linkToWikimediaImageFile && !params.isLandingPage && /(?:wikipedia|wikivoyage|wiktionary|mdwiki)_/i.test(appstate.selectedArchive._file.name)) { - var wikiLang = appstate.selectedArchive._file.name.replace(/(?:wikipedia|wikivoyage|wiktionary|mdwiki)_([^_]+).+/i, '$1'); - var wikimediaZimFlavour = appstate.selectedArchive._file.name.replace(/_.+/, ''); + if (params.linkToWikimediaImageFile && !params.isLandingPage && /(?:wikipedia|wikivoyage|wiktionary|mdwiki)_/i.test(appstate.selectedArchive.file.name)) { + var wikiLang = appstate.selectedArchive.file.name.replace(/(?:wikipedia|wikivoyage|wiktionary|mdwiki)_([^_]+).+/i, '$1'); + var wikimediaZimFlavour = appstate.selectedArchive.file.name.replace(/_.+/, ''); } var newBlock; var assetZIMUrlEnc; - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/'; + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/'; if (params.contentInjectionMode == 'jquery') { htmlArticle = htmlArticle.replace(params.regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, blockClose) { // Don't process data URIs (yet) @@ -5284,7 +5284,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) { // @TODO - remove when fixed on mw-offliner: dirty patch for removing extraneous tags in ids htmlArticle = htmlArticle.replace(/(\bid\s*=\s*"[^\s}]+)\s*\}[^"]*/g, '$1'); // @TODO - remove when fixed in MDwiki ZIM: dirty patch for removing erroneously hard-coded style - if (/^mdwiki/.test(appstate.selectedArchive._file.name)) htmlArticle = htmlArticle.replace(/(class=['"]thumbinner[^>]+style=['"]width\s*:\s*)\d+px/ig, '$1320px'); + if (/^mdwiki/.test(appstate.selectedArchive.file.name)) htmlArticle = htmlArticle.replace(/(class=['"]thumbinner[^>]+style=['"]width\s*:\s*)\d+px/ig, '$1320px'); // Remove landing page scripts that don't work in SW mode htmlArticle = htmlArticle.replace(/]+-\/[^>]*((?:images_loaded|masonry)\.min|article_list_home)\.js"[^<]*<\/script>/gi, ''); // Set max-width for infoboxes (now set in -/s/styles.css) @@ -5399,7 +5399,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) { ? false : params.useMathJax; // Detect raw MathML on page for certain ZIMs that are expected to have it params.containsMathTexRaw = params.useMathJax && - /stackexchange|askubuntu|superuser|stackoverflow|mathoverflow|serverfault|stackapps|proofwiki/i.test(appstate.selectedArchive._file.name) + /stackexchange|askubuntu|superuser|stackoverflow|mathoverflow|serverfault|stackapps|proofwiki/i.test(appstate.selectedArchive.file.name) ? /[^\\](\$\$?)((?:\\\$|(?!\1)[\s\S])+)\1/.test(htmlArticle) : false; // if (params.containsMathTexRaw) { @@ -5572,7 +5572,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) { blobArray.push([title, cssBlobCache.get(title)]); injectCSS(); } else { - var cacheKey = appstate.selectedArchive._file.name + '/' + title; + var cacheKey = appstate.selectedArchive.file.name + '/' + title; cache.getItemFromCacheOrZIM(appstate.selectedArchive, cacheKey).then(function (content) { // DEV: Uncomment line below and break on next to capture cssContent for local filesystem cache // var cssContent = util.uintToString(content); @@ -5884,7 +5884,7 @@ function displayArticleContentInContainer (dirEntry, htmlArticle) { // If the request was not initiated by an existing controlled window, we instantiate the request here if (!appstate.messageChannelWaiting) { // We put the ZIM filename as a prefix in the URL, so that browser caches are separate for each ZIM file - var newLocation = '../' + appstate.selectedArchive._file.name + '/' + dirEntry.namespace + '/' + encodedUrl; + var newLocation = '../' + appstate.selectedArchive.file.name + '/' + dirEntry.namespace + '/' + encodedUrl; if (navigator.serviceWorker.controller) { loaded = false; articleWindow.location.href = newLocation; @@ -6053,7 +6053,7 @@ function addListenersToLink (a, href, baseUrl) { e.stopPropagation(); anchorParameter = href.match(/#([^#;]+)$/); anchorParameter = anchorParameter ? anchorParameter[1] : ''; - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/'; + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/'; var zimRoot = indexRoot.replace(/^.+?\/www\//, '/'); var zimUrl; var zimUrlFullEncoding; @@ -6398,7 +6398,7 @@ function goToArticle (path, download, contentType, pathEnc) { clearFindInArticle(); var shortTitle = path.replace(/[^/]+\//g, '').substring(0, 18); uiUtil.pollSpinner('Loading ' + shortTitle); - var zimName = appstate.selectedArchive._file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, ''); + var zimName = appstate.selectedArchive.file.name.replace(/\.[^.]+$/, '').replace(/_\d+-\d+$/, ''); if (~path.indexOf(params.cachedStartPages[zimName])) { goToMainArticle(); return; @@ -6426,7 +6426,7 @@ function goToArticle (path, download, contentType, pathEnc) { } else if (download || /\/(epub|pdf|zip|.*opendocument|.*officedocument|tiff|mp4|webm|mpeg|octet-stream)\b/i.test(mimetype)) { // PDFs can be treated as a special case, as they can be displayed directly in a browser window or tab in most browsers (but not UWP) if (!/UWP/.test(params.appType) && params.contentInjectionMode === 'serviceworker' && (/\/pdf\b/.test(mimetype) || /\.pdf([?#]|$)/i.test(dirEntry.url))) { - window.open(document.location.pathname.replace(/[^/]+$/, '') + appstate.selectedArchive._file.name + '/' + pathForServiceWorker, + window.open(document.location.pathname.replace(/[^/]+$/, '') + appstate.selectedArchive.file.name + '/' + pathForServiceWorker, params.windowOpener === 'tab' ? '_blank' : 'Download PDF', params.windowOpener === 'window' ? 'toolbar=0,location=0,menubar=0,width=800,height=600,resizable=1,scrollbars=1' : null); } else { @@ -6462,7 +6462,7 @@ function goToRandomArticle () { // We fall back to the old A namespace to support old ZIM files without a text/html MIME type for articles // DEV: If minorVersion is 1, then we are using a v1 article-only title listing. By definition, // all dirEntries in an article-only listing must be articles. - if (appstate.selectedArchive._file.minorVersion === 1 || /text\/html\b/i.test(dirEntry.getMimetype()) || + if (appstate.selectedArchive.file.minorVersion === 1 || /text\/html\b/i.test(dirEntry.getMimetype()) || params.zimType !== 'zimit' && dirEntry.namespace === 'A') { params.isLandingPage = false; alertBoxHeader.style.display = 'none'; diff --git a/www/js/lib/images.js b/www/js/lib/images.js index f25a4ba7..59462824 100644 --- a/www/js/lib/images.js +++ b/www/js/lib/images.js @@ -105,7 +105,7 @@ function extractImages (images, callback) { return; } // Zimit files (at least) will sometimes have a ZIM prefix, but we are extracting raw here - title = title.replace(appstate.selectedArchive._file.name + '/', ''); + title = title.replace(appstate.selectedArchive.file.name + '/', ''); // Zimit files store URLs encoded! if (params.zimType === 'zimit') title = encodeURI(title); appstate.selectedArchive.getDirEntryByPath(title).then(function (dirEntry) { @@ -242,7 +242,7 @@ function prepareImagesServiceWorker (win, forPrinting) { }, 1000); if (!forPrinting && !documentImages.length) return; var imageHtml; - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/'; + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/'; for (var i = 0, l = documentImages.length; i < l; i++) { // Process Wikimedia MathML, but not if we'll be using the jQuery routine later if (!(params.manipulateImages || params.allowHTMLExtraction)) { @@ -312,7 +312,7 @@ function prepareImagesJQuery (win, forPrinting) { container = win; var doc = container.document; var documentImages = doc.querySelectorAll('img[data-kiwixurl], video, audio'); - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name) + '/'; + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/'; indexRoot = indexRoot.replace(/^\//, ''); // Zimit ZIMs work better if all images are extracted if (params.zimType === 'zimit') forPrinting = true; @@ -331,7 +331,7 @@ function prepareImagesJQuery (win, forPrinting) { image.style.opacity = '0'; // Set a minimum width to avoid some images not rendering in squashed hidden tables if (params.displayHiddenBlockElements && image.width && !image.style.minWidth && - /wiki|wiktionary/i.test(appstate.selectedArchive._file.name)) { + /wiki|wiktionary/i.test(appstate.selectedArchive.file.name)) { var imgX = image.width + ''; imgX = imgX.replace(/(\d+)$/, '$1px'); image.style.minWidth = imgX; diff --git a/www/js/lib/transformZimit.js b/www/js/lib/transformZimit.js index 16b0a0aa..0bd33fb8 100644 --- a/www/js/lib/transformZimit.js +++ b/www/js/lib/transformZimit.js @@ -131,7 +131,7 @@ function transformReplayUrls (dirEntry, data, mimetype, callback) { * Note that some Zimit ZIMs have mimeteypes like 'text/html;raw=true', so we can't simply match 'text/html' * Other ZIMs have a mimetype like 'html' (with no 'text/'), so we have to match as generically as possible */ - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name); + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name); if (/\bx?html\b/i.test(mimetype)) { var zimitPrefix = data.match(regexpGetZimitPrefix); // If the URL is the same as the URL with everything after the first / removed, then we are in the root directory @@ -320,7 +320,7 @@ function transformVideoUrl (url, articleDocument, callback) { console.debug('TRANSFORMED VIDEO URL ' + pureUrl + ' --> \n' + transUrl); // If we are dealing with embedded video, we have to find the embedded URL and subsitute it if (/\/embed\//i.test(pureUrl)) { - var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive._file.name); + var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name); Array.prototype.slice.call(articleDocument.querySelectorAll('iframe')).forEach(function (frame) { if (~frame.src.indexOf(videoId)) { var newUrl = window.location.origin + indexRoot + transUrl.replace(/videoembed/, ''); diff --git a/www/js/lib/uiUtil.js b/www/js/lib/uiUtil.js index 5d84a3d9..be8c61ea 100644 --- a/www/js/lib/uiUtil.js +++ b/www/js/lib/uiUtil.js @@ -399,7 +399,7 @@ function displayActiveContentWarning (type) { (params.contentInjectionMode === 'jquery' ? 'Limited Zimit support! Please switch to Service Worker mode if your platform supports it. ' : 'Support for Zimit archives is experimental. Some content (e.g. audio/video) may fail. ') + - 'You can search for content above' + (appstate.selectedArchive._file.fullTextIndex ? ' using full-text search if your app supports it, ' + + 'You can search for content above' + (appstate.selectedArchive.file.fullTextIndex ? ' using full-text search if your app supports it, ' + 'or s' : '. S') + 'tart your search with .* to match part of a title. Type a space for the ZIM Archive Index, or ' + 'space / for the URL Index. [Permanently hide]' + ''; diff --git a/www/js/lib/zimArchive.js b/www/js/lib/zimArchive.js index 1f0e26f3..099824a3 100644 --- a/www/js/lib/zimArchive.js +++ b/www/js/lib/zimArchive.js @@ -1,22 +1,22 @@ /** * zimArchive.js: Support for archives in ZIM format. * - * Copyright 2015 Mossroy and contributors - * License GPL v3: + * Copyright 2015-2023 Mossroy, Jaifroid and contributors + * Licence GPL v3: * * This file is part of Kiwix. * * Kiwix is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. * * Kiwix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU General Public Licence for more details. * - * You should have received a copy of the GNU General Public License + * You should have received a copy of the GNU General Public Licence * along with Kiwix (file LICENSE-GPLv3.txt). If not, see */ @@ -34,10 +34,17 @@ import utf8 from './utf8.js'; /** * ZIM Archive * - * * @typedef ZIMArchive - * @property {ZIMFile} _file The ZIM file (instance of ZIMFile, that might physically be split into several actual files) - * @property {String} _language Language of the content + * @property {ZIMFile} file The ZIM file (instance of ZIMFile, that might physically be split into several actual _files) + * @property {String} counter Counter of various types of content in the archive + * @property {String} creator Creator of the content + * @property {String} date Date of the creation of the archive + * @property {String} description Description of the content + * @property {String} language Language of the content + * @property {String} name Name of the archive + * @property {String} publisher Publisher of the content + * @property {String} title Title of the content + * @property {String} zimType Extended property: currently either 'open' for OpenZIM file type, or 'zimit' for the warc2zim file type used by Zimit */ /** @@ -66,17 +73,16 @@ var LZ; */ function ZIMArchive (storage, path, callbackReady, callbackError) { var that = this; - that._file = null; - that._language = ''; // @TODO + that.file = null; var createZimfile = function (fileArray) { zimfile.fromFileArray(fileArray).then(function (file) { - that._file = file; + that.file = file; // Clear the previous libzimWoker LZ = null; // Set a global parameter to report the search provider type params.searchProvider = 'title'; // File has been created, but we need to add any Listings which extend the archive metadata - that._file.setListings([ + that.file.setListings([ // Provide here any Listings for which we need to extract metadata as key:value obects to be added to the file // 'ptrName' and 'countName' contain the key names to be set in the archive file object { @@ -100,16 +106,16 @@ function ZIMArchive (storage, path, callbackReady, callbackError) { } ]).then(function () { // There is currently an exception thrown in the libzim wasm if we attempt to load a split ZIM archive, so we work around - var isSplitZim = /\.zima.$/i.test(that._file._files[0].name); + var isSplitZim = /\.zima.$/i.test(that.file._files[0].name); var libzimReaderType = params.debugLibzimASM || ('WebAssembly' in self ? 'wasm' : 'asm'); - if (that._file.fullTextIndex && params.debugLibzimASM !== 'disable' && (params.debugLibzimASM || !isSplitZim && + if (that.file.fullTextIndex && params.debugLibzimASM !== 'disable' && (params.debugLibzimASM || !isSplitZim && // The ASM implementation requires Atomics support, whereas the WASM implementation does not (typeof Atomics !== 'undefined' || libzimReaderType === 'wasm') && // Note that NWJS currently throws due to problems with Web Worker context, and Android is very slow unless we use OPFS - !(/Android/.test(params.appType) && !params.useOPFS) && !(window.nw && that._file._files[0].readMode === 'electron'))) { + !(/Android/.test(params.appType) && !params.useOPFS) && !(window.nw && that.file._files[0].readMode === 'electron'))) { console.log('Instantiating libzim ' + libzimReaderType + ' Web Worker...'); LZ = new Worker('js/lib/libzim-' + libzimReaderType + '.js'); - that.callLibzimWorker({ action: 'init', files: that._file._files }).then(function (msg) { + that.callLibzimWorker({ action: 'init', files: that.file._files }).then(function (msg) { // console.debug(msg); params.searchProvider = 'fulltext: ' + libzimReaderType; // Update the API panel @@ -120,7 +126,7 @@ function ZIMArchive (storage, path, callbackReady, callbackError) { }); } else { // var message = 'Full text searching is not available because '; - if (!that._file.fullTextIndex) { + if (!that.file.fullTextIndex) { params.searchProvider += ': no_fulltext'; // message += 'this ZIM does not have a full-text index.'; } else if (isSplitZim) { params.searchProvider += ': split_zim'; // message += 'the ZIM archive is split.'; @@ -134,24 +140,35 @@ function ZIMArchive (storage, path, callbackReady, callbackError) { params.searchProvider += ': unknown'; } uiUtil.reportSearchProviderToAPIStatusPanel(params.searchProvider); - // uiUtil.systemAlert(message); } // Set the archive file type ('open' or 'zimit') params.zimType = that.setZimType(); - // var thisCallbackReady = callbackReady; - // Add any metadata from the M/ namespace that you need access to here + // Add time-critical metadata from the M/ namespace that you need early access to here + // Note that adding metadata here delays the reporting of the ZIM archive as ready + // Further metadata are added in the background below, and can be accessed later Promise.all([ that.addMetadataToZIMFile('Creator'), - that.addMetadataToZIMFile('Name') + that.addMetadataToZIMFile('Language') ]).then(function () { - // If the arhchive name doesn't end in `.zim`, we add it to the metadata - that._file.name = that._file.name.replace(/\.zim\s*$/i, '') + '.zim'; + console.debug('ZIMArchive ready, metadata will be added in the background'); + // All listings should be loaded, so we can now call the callback callbackReady(that); }); - // DEV: Currently, extended listings are only used for title (=article) listings when the user searches - // for an article or uses the Random button, by which time the listings will have been extracted. - // If, in the future, listings are used in a more time-critical manner, consider forcing a wait before - // declaring the archive to be ready, by chaining the following callback in a .then() function of setListings. + // Add non-time-critical metadata to archive in background so as not to delay opening of the archive + // DEV: Note that it does not make sense to extract illustration (icon) metadata here. Instead, if you implement use of the illustration + // metadata as icons for the loaded ZIM [kiwix-js #886], you should simply use the ZIMArdhive.getMetadata() function when needed + setTimeout(function () { + Promise.all([ + that.addMetadataToZIMFile('Counter'), + that.addMetadataToZIMFile('Date'), + that.addMetadataToZIMFile('Description'), + that.addMetadataToZIMFile('Name'), + that.addMetadataToZIMFile('Publisher'), + that.addMetadataToZIMFile('Title') + ]).then(function () { + console.debug('ZIMArchive metadata loaded:', that); + }); + }, 1500); }).catch(function (err) { console.warn('Error setting archive listings: ', err); }); @@ -208,27 +225,27 @@ ZIMArchive.prototype._searchArchiveParts = function (storage, prefixPath) { * @returns {Boolean} */ ZIMArchive.prototype.isReady = function () { - return this._file !== null; + return this.file !== null; }; /** * Detects whether the supplied archive is a Zimit-style archive or an OpenZIM archive and - * sets a _file.zimType property accordingly; also returns the detected type. Extends ZIMFile. + * sets a zimType property accordingly; also returns the detected type. Extends ZIMArchive. * @returns {String} Either 'zimit' for a Zimit archive, or 'open' for an OpenZIM archive */ ZIMArchive.prototype.setZimType = function () { - var fileType = null; + var archiveType = null; if (this.isReady()) { - fileType = 'open'; - this._file.mimeTypes.forEach(function (v) { - if (/warc-headers/i.test(v)) fileType = 'zimit'; + archiveType = 'open'; + this.file.mimeTypes.forEach(function (v) { + if (/warc-headers/i.test(v)) archiveType = 'zimit'; }); - this._file.zimType = fileType; - console.debug('Archive type set to: ' + fileType); + this.zimType = archiveType; + console.debug('Archive type set to: ' + archiveType); } else { console.error('ZIMArchive is not ready! Cannot set ZIM type.'); } - return fileType; + return archiveType; }; /** @@ -238,11 +255,11 @@ ZIMArchive.prototype.setZimType = function () { */ ZIMArchive.prototype.getMainPageDirEntry = function (callback) { if (this.isReady()) { - var mainPageUrlIndex = this._file.mainPage; + var mainPageUrlIndex = this.file.mainPage; var that = this; - this._file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) { + this.file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) { // Filter out Zimit files that we cannot handle without error - if (that._file.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry); + if (that.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry); callback(dirEntry); }); } @@ -254,7 +271,7 @@ ZIMArchive.prototype.getMainPageDirEntry = function (callback) { * @returns {DirEntry} */ ZIMArchive.prototype.parseDirEntryId = function (dirEntryId) { - return zimDirEntry.DirEntry.fromStringId(this._file, dirEntryId); + return zimDirEntry.DirEntry.fromStringId(this.file, dirEntryId); }; /** @@ -318,7 +335,7 @@ ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noIn var prefixNameSpaces = ''; if (search.searchUrlIndex) { var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//; - if (that._file.zimType === 'zimit' && cns === 'C') { + if (that.zimType === 'zimit' && cns === 'C') { // We have to account for the Zimit prefix in Type 1 ZIMs rgxSplitPrefix = /^(?:[CMWX]\/)?(?:[AH]\/)?/; } @@ -425,7 +442,7 @@ ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noIn ZIMArchive.prototype.getContentNamespace = function () { var errorText; if (this.isReady()) { - var ver = this._file.minorVersion; + var ver = this.file.minorVersion; // DEV: There are currently only two defined values for minorVersion in the OpenZIM specification // If this changes, adapt the error checking and return values if (ver > 1) { @@ -455,11 +472,11 @@ ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function (prefix, s prefix = prefix || ''; var cns = this.getContentNamespace(); // Search v1 article listing if available, otherwise fallback to v0 - var articleCount = this._file.articleCount || this._file.entryCount; - var searchFunction = appstate.selectedArchive._file.dirEntryByTitleIndex; + var articleCount = this.file.articleCount || this.file.entryCount; + var searchFunction = appstate.selectedArchive.file.dirEntryByTitleIndex; if (search.searchUrlIndex) { - articleCount = this._file.entryCount; - searchFunction = appstate.selectedArchive._file.dirEntryByUrlIndex; + articleCount = this.file.entryCount; + searchFunction = appstate.selectedArchive.file.dirEntryByUrlIndex; } util.binarySearch(startIndex, articleCount, function(i) { return searchFunction(i).then(function(dirEntry) { @@ -488,7 +505,7 @@ ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function (prefix, s }); }, true).then(function (firstIndex) { var vDirEntries = []; - var addDirEntries = function(index, lastTitle) { + var addDirEntries = function (index, lastTitle) { if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount || lastTitle && !~lastTitle.indexOf(prefix) || index - firstIndex >= search.window) { // DEV: Diagnostics to be removed before merge @@ -543,7 +560,7 @@ ZIMArchive.prototype.findDirEntriesFromFullTextSearch = function (search, dirEnt // We give ourselves an overhead in caclulating the results needed, because full-text search will return some results already found // var resultsNeeded = Math.floor(params.maxSearchResultsSize - dirEntries.length / 2); var resultsNeeded = number || params.maxSearchResultsSize; - return this.callLibzimWorker({action: "search", text: search.prefix, numResults: resultsNeeded}).then(function (results) { + return this.callLibzimWorker({ action: 'search', text: search.prefix, numResults: resultsNeeded }).then(function (results) { if (results) { var dirEntryPaths = []; var fullTextPaths = []; @@ -614,10 +631,10 @@ ZIMArchive.prototype.callLibzimWorker = function (parameters) { * @param {DirEntry} dirEntry * @param {callbackDirEntry} callback */ -ZIMArchive.prototype.resolveRedirect = function(dirEntry, callback) { +ZIMArchive.prototype.resolveRedirect = function (dirEntry, callback) { var that = this; - this._file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) { - if (that._file.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry); + this.file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) { + if (that.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry); callback(resolvedDirEntry); }); }; @@ -632,7 +649,7 @@ ZIMArchive.prototype.resolveRedirect = function(dirEntry, callback) { * @param {DirEntry} dirEntry * @param {callbackStringContent} callback */ -ZIMArchive.prototype.readUtf8File = function(dirEntry, callback) { +ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) { var cns = appstate.selectedArchive.getContentNamespace(); return dirEntry.readData().then(function(data) { var mimetype = dirEntry.getMimetype(); @@ -691,7 +708,7 @@ ZIMArchive.prototype.readUtf8File = function(dirEntry, callback) { * @param {DirEntry} dirEntry * @param {callbackBinaryContent} callback */ -ZIMArchive.prototype.readBinaryFile = function(dirEntry, callback) { +ZIMArchive.prototype.readBinaryFile = function (dirEntry, callback) { var that = this; return dirEntry.readData().then(function(data) { var mimetype = dirEntry.getMimetype(); @@ -720,7 +737,7 @@ ZIMArchive.prototype.readBinaryFile = function(dirEntry, callback) { * @param {String} originalPath Optional string used internally to prevent infinite loop * @return {Promise} A Promise that resolves to a Directory Entry, or null if not found. */ -ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, originalPath) { +ZIMArchive.prototype.getDirEntryByPath = function (path, zimitResolving, originalPath) { var that = this; if (originalPath) appstate.originalPath = originalPath; path = path.replace(/\?kiwix-display/, ''); @@ -729,14 +746,14 @@ ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, original var revisedPath = path.replace(/.*?((?:C\/A|A)\/(?!.*(?:C\/A|A)).+)$/, '$1'); if (revisedPath !== path) { console.warn('*** Revised path from ' + path + '\nto: ' + revisedPath + ' ***'); - if (appstate.selectedArchive._file.zimType === 'zimit') { + if (appstate.selectedArchive.zimType === 'zimit') { console.debug('*** DEV: Consider correcting this error in tranformZimit.js ***'); } path = revisedPath; } } - return util.binarySearch(0, this._file.entryCount, function(i) { - return that._file.dirEntryByUrlIndex(i).then(function(dirEntry) { + return util.binarySearch(0, this.file.entryCount, function(i) { + return that.file.dirEntryByUrlIndex(i).then(function(dirEntry) { var url = dirEntry.namespace + "/" + dirEntry.url; if (path < url) { return -1; @@ -748,15 +765,15 @@ ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, original }); }).then(function (index) { if (index === null) return null; - return that._file.dirEntryByUrlIndex(index); + return that.file.dirEntryByUrlIndex(index); }).then(function (dirEntry) { // Filter Zimit dirEntries and do somee initial transforms - if (that._file.zimType === 'zimit') { + if (that.zimType === 'zimit') { dirEntry = transformZimit.filterReplayFiles(dirEntry); } if (!dirEntry) { // We couldn't get the dirEntry, so look it up the Zimit header - if (!zimitResolving && that._file.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) { + if (!zimitResolving && that.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) { // We need to look the file up in the Header namespace (double replacement ensures both types of ZIM are supported) var oldPath = path; path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/'); @@ -831,9 +848,9 @@ function fuzzySearch(path, search) { */ ZIMArchive.prototype.getRandomDirEntry = function (callback) { // Prefer an article-only (v1) title pointer list, if available - var articleCount = this._file.articleCount || this._file.entryCount; + var articleCount = this.file.articleCount || this.file.entryCount; var index = Math.floor(Math.random() * articleCount); - this._file.dirEntryByTitleIndex(index).then(callback); + this.file.dirEntryByTitleIndex(index).then(callback); }; /** @@ -869,7 +886,7 @@ ZIMArchive.prototype.addMetadataToZIMFile = function (key) { return new Promise(function (resolve, reject) { that.getMetadata(key, function (data) { data = data || ''; - that._file[lcaseKey] = data; + that[lcaseKey] = data; resolve(data); }); }); @@ -877,4 +894,4 @@ ZIMArchive.prototype.addMetadataToZIMFile = function (key) { export default { ZIMArchive: ZIMArchive -}; \ No newline at end of file +}; diff --git a/www/js/lib/zimfile.js b/www/js/lib/zimfile.js index ab67a53b..50f3739d 100644 --- a/www/js/lib/zimfile.js +++ b/www/js/lib/zimfile.js @@ -2,24 +2,28 @@ * zimfile.js: Low-level ZIM file reader. * * Copyright 2015 Mossroy and contributors - * License GPL v3: + * Licence GPL v3: * * This file is part of Kiwix. * * Kiwix is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. * * Kiwix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU General Public Licence for more details. * - * You should have received a copy of the GNU General Public License + * You should have received a copy of the GNU General Public Licence * along with Kiwix (file LICENSE-GPLv3.txt). If not, see */ +'use strict'; + +/* global params, appstate */ + import xz from './xzdec_wrapper.js'; import zstd from './zstddec_wrapper.js'; import util from './util.js'; @@ -27,10 +31,6 @@ import utf8 from './utf8.js'; import zimDirEntry from './zimDirEntry.js'; import FileCache from './filecache.js'; -/* global params, appstate */ - -'use strict'; - /** * This code makes an assumption that no Directory Entry will be larger that MAX_SUPPORTED_DIRENTRY_SIZE bytes. * If a larger dirEntry is encountered, a warning will display in console. Increase this value if necessary. @@ -112,7 +112,6 @@ var readInt = function (data, offset, size) { * @property {Integer} mimeListPos Position of the MIME type list (also header size) * @property {Integer} mainPage Main page or 0xffffffff if no main page * @property {Integer} layoutPage Layout page or 0xffffffffff if no layout page - * @property {String} zimType Extended property: currently either 'open' for OpenZIM file type, or 'zimit' for the warc2zim file type used by Zimit (set in zimArchive.js) * @property {Map} mimeTypes Extended property: the ZIM file's MIME type table rendered as a Map (calculated entry) */ @@ -230,7 +229,7 @@ ZIMFile.prototype.dirEntry = function (offset) { * @returns {Promise} A Promise for the requested DirEntry */ ZIMFile.prototype.dirEntryByUrlIndex = function (index) { - var that = appstate.selectedArchive._file; + var that = appstate.selectedArchive.file; if (!that) return Promise.resolve(null); return that._readInteger(that.urlPtrPos + index * 8, 8).then(function (dirEntryPos) { return that.dirEntry(dirEntryPos); @@ -243,7 +242,7 @@ ZIMFile.prototype.dirEntryByUrlIndex = function (index) { * @returns {Promise} A Promise for the requested DirEntry */ ZIMFile.prototype.dirEntryByTitleIndex = function (index) { - var that = appstate.selectedArchive._file; + var that = appstate.selectedArchive.file; // Use v1 title pointerlist if available, or fall back to legacy v0 list var ptrList = that.articlePtrPos || that.titlePtrPos; return that._readInteger(ptrList + index * 4, 4).then(function (urlIndex) { @@ -333,7 +332,6 @@ ZIMFile.prototype.setListings = function (listings) { // If we are in a legacy ZIM archive, we need to calculate the true article count (of entries in the A namespace) // This effectively emulates the v1 article pointerlist if (this.minorVersion === 0) { - // console.debug('ZIM DirListing version: 0 (legacy)', this); // Initiate a binary search for the first or last article var getArticleIndexByOrdinal = function (ordinal) { return util.binarySearch(0, that.entryCount, function (i) {