kiwix-js-pwa/www/js/lib/zimArchive.js

/**
 * zimArchive.js: Support for archives in ZIM format.
 *
 * Copyright 2015 Mossroy and contributors
 * License GPL v3:
 *
 * This file is part of Kiwix.
 *
 * Kiwix is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Kiwix is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Kiwix (file LICENSE-GPLv3.txt).  If not, see <http://www.gnu.org/licenses/>
 */

'use strict';

/* global params */

import zimfile from './zimfile.js';
import zimDirEntry from './zimDirEntry.js';
import transformZimit from './transformZimit.js';
import util from './util.js';
import uiUtil from './uiUtil.js';
import utf8 from './utf8.js';

/**
 * ZIM Archive
 *
 *
 * @typedef ZIMArchive
 * @property {ZIMFile} _file The ZIM file (instance of ZIMFile, that might physically be split into several actual files)
 * @property {String} _language Language of the content
 */

/**
 * @callback callbackZIMArchive
 * @param {ZIMArchive} zimArchive Ready-to-use ZIMArchive
 */

/**
 * @callback callbackMetadata
 * @param {String} data metadata string
 */

/**
 * @param {Worker} LZ A Web Worker to run the libzim Web Assembly binary
 */
var LZ;

/**
 * Creates a ZIM archive object to access the ZIM file at the given path in the given storage.
 * This constructor can also be used with a single File parameter.
 *
 * @param {StorageFirefoxOS|Array<Blob>} storage Storage (in this case, the path must be given) or Array of Files (path parameter must be omitted)
 * @param {String} path The Storage path for an OS that requires this to be specified
 * @param {callbackZIMArchive} callbackReady The function to call when the archive is ready to use
 * @param {callbackZIMArchive} callbackError The function to call when an error occurs
 */
function ZIMArchive (storage, path, callbackReady, callbackError) {
    var that = this;
    that._file = null;
    that._language = ''; // @TODO
    var createZimfile = function (fileArray) {
        zimfile.fromFileArray(fileArray).then(function (file) {
            that._file = file;
            // Clear the previous libzimWoker
            LZ = null;
            // Set a global parameter to report the search provider type
            params.searchProvider = 'title';
            // File has been created, but we need to add any Listings which extend the archive metadata
            that._file.setListings([
                // Provide here any Listings for which we need to extract metadata as key:value obects to be added to the file
                // 'ptrName' and 'countName' contain the key names to be set in the archive file object
                {
                    // This defines the standard v0 (legacy) title index that contains listings for every entry in the ZIM (not just articles)
                    // It represents the same index that is referenced in the ZIM archive header
                    path: 'X/listing/titleOrdered/v0',
                    ptrName: 'titlePtrPos',
                    countName: 'entryCount'
                },
                {
                    // This defines a new version 1 index that is present in no-namespace ZIMs, and contains a title-ordered list of articles
                    path: 'X/listing/titleOrdered/v1',
                    ptrName: 'articlePtrPos',
                    countName: 'articleCount'
                },
                {
                    // This tests for and specifies the existence of any Xapian Full Text Index
                    path: 'X/fulltext/xapian',
                    ptrName: 'fullTextIndex',
                    countName: 'fullTextIndexSize'
                }
            ]).then(function () {
                // There is currently an exception thrown in the libzim wasm if we attempt to load a split ZIM archive, so we work around
                var isSplitZim = /\.zima.$/i.test(that._file._files[0].name);
                var libzimReaderType = params.debugLibzimASM || ('WebAssembly' in self ? 'wasm' : 'asm');
                if (that._file.fullTextIndex && params.debugLibzimASM !== 'disable' && (params.debugLibzimASM || !isSplitZim &&
                // The ASM implementation requires Atomics support, whereas the WASM implementation does not
                (typeof Atomics !== 'undefined' || libzimReaderType === 'wasm') &&
                // Note that Android and NWJS currently throw due to problems with Web Worker context
                !/Android/.test(params.appType) && !(window.nw && that._file._files[0].readMode === 'electron'))) {
                    console.log('Instantiating libzim ' + libzimReaderType + ' Web Worker...');
                    LZ = new Worker('js/lib/libzim-' + libzimReaderType + '.js');
                    that.callLibzimWorker({ action: 'init', files: that._file._files }).then(function (msg) {
                        // console.debug(msg);
                        params.searchProvider = 'fulltext: ' + libzimReaderType;
                        // Update the API panel
                        uiUtil.reportSearchProviderToAPIStatusPanel(params.searchProvider);
                    }).catch(function (err) {
                        uiUtil.reportSearchProviderToAPIStatusPanel(params.searchProvider + ': ERROR');
                        console.error('The libzim worker could not be instantiated!', err);
                    });
                } else {
                    // var message = 'Full text searching is not available because ';
                    if (!that._file.fullTextIndex) {
                        params.searchProvider += ': no_fulltext'; // message += 'this ZIM does not have a full-text index.';
                    } else if (isSplitZim) {
                        params.searchProvider += ': split_zim'; // message += 'the ZIM archive is split.';
                    } else if (typeof Atomics === 'undefined') {
                        params.searchProvider += ': no_atomics'; // message += 'this browser does not support Atomic operations.';
                    } else if (/Android/.test(params.appType)) {
                        params.searchProvider += ': no_sharedArrayBuffer';
                    } else if (params.debugLibzimASM === 'disable') {
                        params.searchProvider += ': disabled';
                    } else {
                        params.searchProvider += ': unknown';
                    }
                    uiUtil.reportSearchProviderToAPIStatusPanel(params.searchProvider);
                    // uiUtil.systemAlert(message);
                }
            });
            // Set the archive file type ('open' or 'zimit')
            params.zimType = that.setZimType();
            // DEV: Currently, extended listings are only used for title (=article) listings when the user searches
            // for an article or uses the Random button, by which time the listings will have been extracted.
            // If, in the future, listings are used in a more time-critical manner, consider forcing a wait before
            // declaring the archive to be ready, by chaining the following callback in a .then() function of setListings.
            callbackReady(that);
        });
    };
    if (storage && !path) {
        var fileList = storage;
        // We need to convert the FileList into an Array
        var fileArray = [].slice.call(fileList);
        // The constructor has been called with an array of File/Blob parameter
        createZimfile(fileArray);
    } else {
        if (/.*zim..$/.test(path)) {
            // split archive
            that._searchArchiveParts(storage, path.slice(0, -2)).then(function (fileArray) {
                createZimfile(fileArray);
            }).catch(function (error) {
                callbackError('Error reading files in split archive ' + path + ': ' + error, 'Error reading archive files');
            });
        } else {
            storage.get(path).then(function (file) {
                createZimfile([file]);
            }).catch(function (error) {
                callbackError('Error reading ZIM file ' + path + ' : ' + error, 'Error reading archive file');
            });
        }
    }
}

/**
 * Searches the directory for all parts of a split archive.
 * @param {Storage} storage storage interface
 * @param {String} prefixPath path to the split files, missing the "aa" / "ab" / ... suffix.
 * @returns {Promise} that resolves to the array of file objects found.
 */
ZIMArchive.prototype._searchArchiveParts = function (storage, prefixPath) {
    var fileArray = [];
    var nextFile = function (part) {
        var suffix = String.fromCharCode(0x61 + Math.floor(part / 26)) + String.fromCharCode(0x61 + part % 26);
        return storage.get(prefixPath + suffix)
            .then(function (file) {
                fileArray.push(file);
                return nextFile(part + 1);
            }, function (error) {
                console.error('Error reading split archive file ' + prefixPath + suffix + ': ', error);
                return fileArray;
            });
    };
    return nextFile(0);
};

/**
 *
 * @returns {Boolean}
 */
ZIMArchive.prototype.isReady = function () {
    return this._file !== null;
};

/**
 * Detects whether the supplied archive is a Zimit-style archive or an OpenZIM archive and
 * sets a _file.zimType property accordingly; also returns the detected type. Extends ZIMFile.
 * @returns {String} Either 'zimit' for a Zimit archive, or 'open' for an OpenZIM archive
 */
ZIMArchive.prototype.setZimType = function () {
    var fileType = null;
    if (this.isReady()) {
        fileType = 'open';
        this._file.mimeTypes.forEach(function (v) {
            if (/warc-headers/i.test(v)) fileType = 'zimit';
        });
        this._file.zimType = fileType;
        console.debug('Archive type set to: ' + fileType);
    } else {
        console.error('ZIMArchive is not ready! Cannot set ZIM type.');
    }
    return fileType;
};

/**
 * Looks for the DirEntry of the main page
 * @param {callbackDirEntry} callback
 * @returns {Promise} that resolves to the DirEntry
 */
ZIMArchive.prototype.getMainPageDirEntry = function (callback) {
    if (this.isReady()) {
        var mainPageUrlIndex = this._file.mainPage;
        var that = this;
        this._file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) {
            // Filter out Zimit files that we cannot handle without error
            if (that._file.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry);
            callback(dirEntry);
        });
    }
};

/**
 *
 * @param {String} dirEntryId
 * @returns {DirEntry}
 */
ZIMArchive.prototype.parseDirEntryId = function (dirEntryId) {
    return zimDirEntry.DirEntry.fromStringId(this._file, dirEntryId);
};

/**
 * @callback callbackDirEntryList
 * @param {Array.<DirEntry>} dirEntryArray Array of DirEntries found
 */

/**
 * Look for DirEntries with title starting with the prefix of the current search object.
 * For now, ZIM titles are case sensitive.
 * So, as workaround, we try several variants of the prefix to find more results.
 * This should be enhanced when the ZIM format will be modified to store normalized titles
 * See https://phabricator.wikimedia.org/T108536
 *
 * @param {Object} search The current appstate.search object
 * @param {callbackDirEntryList} callback The function to call with the result
 * @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing)
 */
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noInterim) {
    var that = this;
    // Establish array of initial values that must be searched first. All of these patterns are generated by the full
    // search type, and some by basic, but we need the most common patterns to be searched first, as it returns search
    // results much more quickly if we do this (and the user can click on a result before the rarer patterns complete)
    // NB duplicates are removed before processing search array
    var startArray = [];
    var cns = this.getContentNamespace();
    var dirEntries = [];
    search.scanCount = 0;
    // Check if user prefixed search with a namespace-like pattern. If so, do a search for namespace + url
    if (/^[-ABCHIJMUVWX]\//.test(search.prefix)) search.searchUrlIndex = true;
    // Regex below breaks the string into the pattern: group 1: alphanumericsearch; group 2: regex beginning with .* or .+, or contained in (?:regex)
    var isPrefixRegExp = search.prefix.match(/^((?:[^(.]|\((?!\?:)|\.(?![*+]))*)(\(\?:.*\)|\.[*+].*)$/);
    search.rgxPrefix = null;
    var prefix = search.prefix;
    // Launch a full-text search if possible
    if (LZ && !search.searchUrlIndex) that.findDirEntriesFromFullTextSearch(search, dirEntries).then(function (fullTextDirEntries) {
        // If user initiated a new search, cancel this one
        // In particular, do not set the search status back to 'complete'
        // as that would cause outdated results to unexpectedly pop up
        if (search.status === 'cancelled') return callback([], search);
        dirEntries = fullTextDirEntries;
        search.status = 'complete';
        callback(dirEntries, search);
    });
    if (isPrefixRegExp) {
        // User has initiated a regular expression search - note the only regexp special character allowed in the alphanumeric part is \s
        prefix = isPrefixRegExp[1].replace(/\\s/g, ' ');
        var regexCorrect = true;
        try {
            search.rgxPrefix = new RegExp(isPrefixRegExp[2], 'i');
        } catch (err) {
            // User has incorrect regular expression syntax
            regexCorrect = false;
        }
        if (!regexCorrect) {
            search.status = 'error';
            callback([], search);
            return;
        }
    }
    var prefixNameSpaces = '';
    if (search.searchUrlIndex) {
        var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//;
        if (that._file.zimType === 'zimit' && cns === 'C') {
            // We have to account for the Zimit prefix in Type 1 ZIMs
            rgxSplitPrefix = /^(?:[CMWX]\/)?(?:[AH]\/)?/;
        }
        var splitPrefix = prefix.match(rgxSplitPrefix);
        prefixNameSpaces = splitPrefix ? splitPrefix[0] : '';
        var splitSuffix = prefix.split(rgxSplitPrefix);
        prefix = splitSuffix ? splitSuffix[1] : prefix;
    }
    // Ensure a search is done on the string exactly as typed
    startArray.push(prefix);
    // Normalize any spacing and make string all lowercase
    prefix = prefix.replace(/\s+/g, ' ').toLocaleLowerCase();
    // Add lowercase string with initial uppercase (this is a very common pattern)
    startArray.push(prefix.replace(/^./, function (m) {
        return m.toLocaleUpperCase();
    }));
    // Add pure lowercase string (rarer)
    startArray.push(prefix);
    // Add a case-insensitive search for the string (pseudo-regex notation)
    startArray.push('/' + prefix + '/i');
    // Get the full array of combinations to check number of combinations
    var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
    // Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
    search.type = fullCombos.length < 300 ? 'full' : 'basic';
    // We have to remove duplicate string combinations because util.allCaseFirstLetters() can return some combinations
    // where uppercase and lowercase combinations are exactly the same, e.g. where prefix begins with punctuation
    // or currency signs, for languages without case, or where user-entered case duplicates calculated case
    var prefixVariants = util.removeDuplicateStringsInSmallArray(
        startArray.concat(
            // Get basic combinations first for speed of returning results
            util.allCaseFirstLetters(prefix).concat(
                search.type === 'full' ? fullCombos : []
            )
        )
    );
    function searchNextVariant () {
        // If user has initiated a new search, cancel this one
        if (search.status === 'cancelled') return callback([], search);
        var remaining = search.size - dirEntries.length;
        if (prefixVariants.length === 0 || remaining < 1) {
            // We have found all the title-search entries we are going to get, so indicate search type if we're still searching
            if (LZ && !search.searchUrlIndex && search.status !== 'complete') search.type = 'fulltext';
            else if (LZ && search.searchUrlIndex && remaining > 0) {
                search.type = 'fulltext';
                that.findDirEntriesFromFullTextSearch(search, dirEntries, remaining).then(function (fullTextDirEntries) {
                    if (search.status === 'cancelled') return callback([], search);
                    dirEntries = fullTextDirEntries;
                    search.status = 'complete';
                    callback(dirEntries, search);
                });
            }
            else search.status = 'complete';
            return callback(dirEntries, search);
        }
        // Dynamically populate list of articles
        search.status = 'interim';
        if (!noInterim) callback(dirEntries, search);
        search.found = dirEntries.length;
        var prefix = prefixNameSpaces + prefixVariants[0];
        search.lc = false;
        // If it's pseudo-regex with a case-insensitive flag like '/my search/i', do an enhanced case-insensitive search
        if (/^\/.+\/i$/.test(prefixVariants[0])) {
            search.lc = true;
            prefix = prefixNameSpaces + prefixVariants[0].replace(/^\/(.+)\/i/, '$1').toLocaleLowerCase();
            console.debug('Searching case-insensitively for: "' + prefix + '"');
        }
        // Remove in-progress search variant from array
        prefixVariants = prefixVariants.slice(1);
        // Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search
        search.window = search.rgxPrefix ? 10000 * search.size : search.size;
        that.findDirEntriesWithPrefixCaseSensitive(prefix, search,
            function (newDirEntries, countReport, interim) {
                search.countReport = countReport;
                if (search.status === 'cancelled') return callback([], search);
                if (!noInterim && countReport === true) return callback(dirEntries, search);
                // Only push interim results to the dirEntries array (otherwise we get a duplicated array when the final results are reported to this function)
                if (interim) {
                    // Collect all the found paths for the dirEntries so far
                    var dirEntryPaths = [];
                    for (var i = 0; i < dirEntries.length; i++) {
                        dirEntryPaths.push(dirEntries[i].url);
                    }
                    // Push new directory entries to the end of the global array so long as they are not duplicates
                    for (var j = 0; j < newDirEntries.length; j++) {
                        if (~dirEntryPaths.indexOf(newDirEntries[j].url)) continue;
                        dirEntries.push(newDirEntries[j]);
                    }
                    search.found = dirEntries.length;
                    if (!noInterim && newDirEntries.length) return callback(dirEntries, search);
                } else return searchNextVariant();
            }
        );
    }
    searchNextVariant();
};

/**
 * A method to return the namespace in the ZIM file that contains the primary user content. In old-format ZIM files (minor
 * version 0) there are a number of content namespaces, but the primary one in which to search for titles is 'A'. In new-format
 * ZIMs (minor version 1) there is a single content namespace 'C'. See https://openzim.org/wiki/ZIM_file_format. This method
 * throws an error if it cannot determine the namespace or if the ZIM is not ready.
 * @returns {String} The content namespace for the ZIM archive
 */
ZIMArchive.prototype.getContentNamespace = function () {
    var errorText;
    if (this.isReady()) {
        var ver = this._file.minorVersion;
        // DEV: There are currently only two defined values for minorVersion in the OpenZIM specification
        // If this changes, adapt the error checking and return values
        if (ver > 1) {
            errorText = 'Unknown ZIM minor version!';
        } else {
            return ver === 0 ? 'A' : 'C';
        }
    } else {
        errorText = 'We could not determine the content namespace because the ZIM file is not ready!';
    }
    throw new Error(errorText);
};

/**
 * Look for dirEntries with title starting with the given prefix (case-sensitive)
 *
 * @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared
 * @param {Object} search The appstate.search object (for comparison, so that we can cancel long binary searches)
 * @param {callbackDirEntryList} callback The function to call with the array of dirEntries with titles that begin with prefix
 * @param {Integer} startIndex The index number with which to commence the search, or null
 */
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function (prefix, search, callback, startIndex) {
    // Save the value of startIndex because value of null has a special meaning in combination with prefix:
    // produces a list of matches starting with first match and then next x dirEntries thereafter
    var saveStartIndex = startIndex;
    startIndex = startIndex || 0;
    prefix = prefix || '';
    var cns = this.getContentNamespace();
    // Search v1 article listing if available, otherwise fallback to v0
    var articleCount = this._file.articleCount || this._file.entryCount;
    var searchFunction = appstate.selectedArchive._file.dirEntryByTitleIndex;
    if (search.searchUrlIndex) {
        articleCount = this._file.entryCount;
        searchFunction = appstate.selectedArchive._file.dirEntryByUrlIndex;
    }
    util.binarySearch(startIndex, articleCount, function(i) {
        return searchFunction(i).then(function(dirEntry) {
            if (search.status === 'cancelled') return 0;
            var ns = dirEntry.namespace;
            var ti = search.searchUrlIndex ? dirEntry.url : dirEntry.getTitleOrUrl();
            if (!search.searchUrlIndex) {
                // DEV: This search is redundant if we managed to populate articlePtrLst and articleCount, but it only takes two instructions and
                // provides maximum compatibility with rare ZIMs where attempts to find first and last article (in zimArchive.js) may have failed
                if (ns < cns) return 1;
                if (ns > cns) return -1;
                // We should now be in namespace A (old format ZIM) or C (new format ZIM)
                if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
                    ti = ti.toLocaleLowerCase();
                    prefix = prefix.toLocaleLowerCase();
                }
                return prefix <= ti ? -1 : 1;
            } else {
                if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
                    ns = ns + '/' + ti.replace(/^((?:[AH])?)\/?.*/, '$1');
                    ti = ti.replace(/^[AH]\//, '').toLocaleLowerCase();
                }
                // if (search.rgxPrefix && search.rgxPrefix.test(ti)) return -1;
                return prefix <= (ns + '/' + ti) ? -1 : 1;
            }
        });
    }, true).then(function (firstIndex) {
        var vDirEntries = [];
        var addDirEntries = function(index, lastTitle) {
            if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount
            || lastTitle && !~lastTitle.indexOf(prefix) || index - firstIndex >= search.window) {
                // DEV: Diagnostics to be removed before merge
                if (vDirEntries.length) {
                    console.debug('Scanned ' + (index - firstIndex) + ' titles for "' + prefix +
                        '" (found ' + vDirEntries.length + ' match' + (vDirEntries.length === 1 ? ')' : 'es)'));
                }
                return {
                    dirEntries: vDirEntries,
                    nextStart: index
                };
            }
            return searchFunction(index).then(function (dirEntry) {
                search.scanCount++;
                var title = dirEntry.getTitleOrUrl();
                // If we are searching by URL, display namespace also
                if (search.searchUrlIndex) title = dirEntry.namespace + '/' + dirEntry.url;
                if (search.lc && !search.rgxPrefix) { // Search comparator should be lowercase if not using regex (for case-insensitive search)
                    var ns = title.replace(/^((?:C\/)?(?:[AH]\/)?).*/, '$1');
                    title = ns + title.replace(ns, '').toLocaleLowerCase();
                }
                // Only return dirEntries with titles that actually begin with prefix
                if (saveStartIndex === null || (search.searchUrlIndex || dirEntry.namespace === cns) && title.indexOf(prefix) === 0) {
                    if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title)) { // Regex test case-insensitive if i flag set
                        vDirEntries.push(dirEntry);
                        // Report interim result
                        if (typeof saveStartIndex === 'undefined') callback([dirEntry], false, true);
                    }
                }
                // Report number of titles scanned every 5000 titles
                if (!(search.scanCount % 5000) && typeof saveStartIndex === 'undefined') callback([], true, true);
                return addDirEntries(index + 1, title);
            });
        };
        return addDirEntries(firstIndex);
    }).then(function(objWithIndex) {
        return callback(objWithIndex.dirEntries, objWithIndex.nextStart);
    });
};

/**
 * Find Directory Entries corresponding to the requested search using Full Text search provided by libzim
 *
 * @param {Object} search The appstate.search object
 * @param {Array} dirEntries The array of already found Directory Entries
 * @param {Integer} number Optional positive number of search results requested (otherwise params.maxSearchResults will be used)
 * @returns {Promise<callbackDirEntry>} The augmented array of Directory Entries with titles that correspond to search
 */
ZIMArchive.prototype.findDirEntriesFromFullTextSearch = function (search, dirEntries, number) {
    var cns = this.getContentNamespace();
    var that = this;
    // We give ourselves an overhead in caclulating the results needed, because full-text search will return some results already found
    // var resultsNeeded = Math.floor(params.maxSearchResultsSize - dirEntries.length / 2);
    var resultsNeeded = number || params.maxSearchResultsSize;
    return this.callLibzimWorker({action: "search", text: search.prefix, numResults: resultsNeeded}).then(function (results) {
        if (results) {
            var dirEntryPaths = [];
            var fullTextPaths = [];
            // Collect all the found paths for the dirEntries
            for (var i = 0; i < dirEntries.length; i++) {
                dirEntryPaths.push(dirEntries[i].namespace + '/' + dirEntries[i].url);
            }
            // Collect all the paths for full text search, pruning as we go
            var path;
            for (var j = 0; j < results.entries.length; j++) {
                search.scanCount++;
                path = results.entries[j].path;
                // Full-text search result paths are missing the namespace in Type 1 ZIMs, so we add it back
                path = cns === 'C' ? cns + '/' + path : path;
                if (~dirEntryPaths.indexOf(path)) continue;
                fullTextPaths.push(path);
            }
            var promisesForDirEntries = [];
            for (var k = 0; k < fullTextPaths.length; k++) {
                promisesForDirEntries.push(that.getDirEntryByPath(fullTextPaths[k]));
            }
            return Promise.all(promisesForDirEntries).then(function (fullTextDirEntries) {
                for (var l = 0; l < fullTextDirEntries.length; l++) {
                    dirEntries.push(fullTextDirEntries[l]);
                }
                return dirEntries;
            });
        } else {
            return dirEntries;
        }
    });
};

/**
 * Calls the libzim Web Worker with the given parameters, and returns a Promise with its response
 *
 * @param {Object} parameters
 * @returns {Promise}
 */
ZIMArchive.prototype.callLibzimWorker = function (parameters) {
    return new Promise(function (resolve, reject) {
        console.debug('Calling libzim WebWorker with parameters', parameters);
        var tmpMessageChannel = new MessageChannel();
        // var t0 = performance.now();
        tmpMessageChannel.port1.onmessage = function (event) {
            // var t1 = performance.now();
            // var readTime = Math.round(t1 - t0);
            // console.debug("Response given by the WebWorker in " + readTime + " ms", event.data);
            resolve(event.data);
        };
        tmpMessageChannel.port1.onerror = function (event) {
            // var t1 = performance.now();
            // var readTime = Math.round(t1 - t0);
            // console.error("Error sent by the WebWorker in " + readTime + " ms", event.data);
            reject(event.data);
        };
        LZ.postMessage(parameters, [tmpMessageChannel.port2]);
    });
};

/**
 * @callback callbackDirEntry
 * @param {DirEntry} dirEntry The DirEntry found
 */

/**
 *
 * @param {DirEntry} dirEntry
 * @param {callbackDirEntry} callback
 */
ZIMArchive.prototype.resolveRedirect = function(dirEntry, callback) {
    var that = this;
    this._file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) {
        if (that._file.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry);
        callback(resolvedDirEntry);
    });
};

/**
 * @callback callbackStringContent
 * @param {String} content String content
 */

/**
 *
 * @param {DirEntry} dirEntry
 * @param {callbackStringContent} callback
 */
ZIMArchive.prototype.readUtf8File = function(dirEntry, callback) {
    var cns = appstate.selectedArchive.getContentNamespace();
    return dirEntry.readData().then(function(data) {
        var mimetype = dirEntry.getMimetype();
        if (window.TextDecoder) {
            data = new TextDecoder('utf-8').decode(data);
        } else {
            // Support for IE11 and Edge Legacy - only support UTF-8 decoding
            data = utf8.parse(data);
        }
        if (/\bx?html\b/i.test(mimetype)) {
            // If the data were encoded with a different mimtype, here is how to change it
            // var encoding = decData.match(/<meta\b[^>]+?Content-Type[^>]+?charset=([^'"\s]+)/i);
            // encoding = encoding ? encoding[1] : '';
            // if (encoding && !/utf-8/i.test(encoding)) decData = new TextDecoder(encoding).decode(data);

            //Some Zimit assets have moved location and we need to follow the moved permanently data
            if (/301\s*moved\s+permanently/i.test(data)) dirEntry = transformZimit.getZimitRedirect(dirEntry, data, cns);

            // Some Zimit archives have an incorrect meta charset tag. See https://github.com/openzim/warc2zim/issues/88.
            // So we remove it!
            data = data.replace(/<meta\b[^>]+?Content-Type[^>]+?charset=([^'"\s]+)[^>]+>\s*/i, function (m0, m1) {
                if (!/utf-8/i.test(m1)) {
                    return '';
                }
                return m0;
            });
        }
        if (dirEntry.inspect || dirEntry.zimitRedirect) {
            if (dirEntry.inspect) dirEntry = transformZimit.getZimitRedirect(dirEntry, data, cns);
            if (dirEntry.zimitRedirect) {
                return appstate.selectedArchive.getDirEntryByPath(dirEntry.zimitRedirect).then(function (rd) {
                    return appstate.selectedArchive.readUtf8File(rd, callback);
                });
            }
       } else {
            // DEV: Note that we cannot terminate regex below with $ because there is a (rogue?) mimetype
            // of 'text/html;raw=true'
            if (params.zimType === 'zimit' && /\/(?:html|css|javascript)\b/i.test(mimetype)) {
                data = transformZimit.transformReplayUrls(dirEntry, data, mimetype);
            }
            callback(dirEntry, data);
        }
    }).catch(function (e) {
        console.error('Error reading directory entry', e);
        callback(dirEntry, '');
    });
};

/**
 * @callback callbackBinaryContent
 * @param {Uint8Array} content binary content
 */

/**
 * Read a binary file.
 * @param {DirEntry} dirEntry
 * @param {callbackBinaryContent} callback
 */
ZIMArchive.prototype.readBinaryFile = function(dirEntry, callback) {
    var that = this;
    return dirEntry.readData().then(function(data) {
        var mimetype = dirEntry.getMimetype();
        if (dirEntry.inspect) {
            dirEntry = transformZimit.getZimitRedirect(dirEntry, utf8.parse(data), appstate.selectedArchive.getContentNamespace());
            if (dirEntry.zimitRedirect) {
                return appstate.selectedArchive.getDirEntryByPath(dirEntry.zimitRedirect).then(function (rd) {
                    return appstate.selectedArchive.readBinaryFile(rd, callback);
                })
            }
        } else {
            // DEV: Note that we cannot terminate regex below with $ because there is a (rogue?) mimetype
            // of 'text/html;raw=true'
            if (params.zimType === 'zimit' && /\/(?:html|css|javascript)\b/i.test(mimetype)) {
                data = transformZimit.transformReplayUrls(dirEntry, utf8.parse(data), mimetype);
            }
            callback(dirEntry, data);
        }
    });
};

/**
 * Searches the URL pointer list of Directory Entries by pathname
 * @param {String} path The pathname of the DirEntry that is required (namespace + filename)
 * @param {Boolean} zimitResolving A flag to indicate that the a Zimit path is in a lookup loop
 * @param {String} originalPath Optional string used internally to prevent infinite loop
 * @return {Promise<DirEntry>} A Promise that resolves to a Directory Entry, or null if not found.
 */
ZIMArchive.prototype.getDirEntryByPath = function(path, zimitResolving, originalPath) {
    var that = this;
    if (originalPath) appstate.originalPath = originalPath;
    path = path.replace(/\?kiwix-display/, '');
    // Correct obvious errors
    if (!originalPath) {
        var revisedPath = path.replace(/.*?((?:C\/A|A)\/(?!.*(?:C\/A|A)).+)$/, '$1');
        if (revisedPath !== path) {
            console.warn('*** Revised path from ' + path + '\nto: ' + revisedPath + ' ***');
            if (appstate.selectedArchive._file.zimType === 'zimit') {
                console.debug('*** DEV: Consider correcting this error in tranformZimit.js ***');
            }
            path = revisedPath;
        }
    }
    return util.binarySearch(0, this._file.entryCount, function(i) {
        return that._file.dirEntryByUrlIndex(i).then(function(dirEntry) {
            var url = dirEntry.namespace + "/" + dirEntry.url;
            if (path < url) {
                return -1;
            } else if (path > url) {
                return 1;
            } else {
                return 0;
            }
        });
    }).then(function (index) {
        if (index === null) return null;
        return that._file.dirEntryByUrlIndex(index);
    }).then(function (dirEntry) {
        // Filter Zimit dirEntries and do somee initial transforms
        if (that._file.zimType === 'zimit')
            dirEntry = transformZimit.filterReplayFiles(dirEntry);
        if (!dirEntry) {
            // We couldn't get the dirEntry, so look it up the Zimit header
            if (!zimitResolving && that._file.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) {
                // We need to look the file up in the Header namespace (double replacement ensures both types of ZIM are supported)
                var oldPath = path;
                path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/');
                console.debug('DirEntry ' + oldPath + ' not found, looking up header: ' + path);
                return that.getDirEntryByPath(path, true, oldPath);
            // } else if (zimitResolving) {
            } else if (zimitResolving && appstate.originalPath && appstate.originalPath === appstate.expectedArticleURLToBeDisplayed) {
                // We couldn't find the Header, so try a fuzzy search only if the user is loading an article
                path = appstate.originalPath;
                var ns = path.replace(/^((?:C\/)?A\/).*/, '$1'); // If Zimit pseudo-namespaces are changed, will need to edit this
                path = path.replace(ns, '');
                path = path.toLocaleLowerCase(); // We are going to combine case-insensitive string comparison with regex matching
                var rgxPath = path.replace(/([-/?.$^|*+()[{])/g, '\\$1'); // Make sure we escape regex characters
                path = ns + path; // Add namespace back to path for full matching
                // path = ns;
                var search = {
                    rgxPrefix: new RegExp('.*' + rgxPath, 'i'),
                    searchUrlIndex: true,
                    lc: true, // Make the comparator (e.g. dirEntry.url) lowercase
                    size: 1,
                    found: 0
                }
                return fuzzySearch(path, search);
            } else {
                var newpath = path.replace(/^((?:A|C\/A)\/)[^/]+\/(.+)$/, '$1$2');
                if (newpath === path) return null; // No further paths to explore!
                console.log("Article " + path + " not available, but moving up one directory to compensate for ZIM coding error...");
                return that.getDirEntryByPath(newpath);
            }
        } else {
            // DEBUG: List found Directory Entry
            // if (dirEntry) console.debug('Found ' + path);
            return dirEntry;
        }
    });
};

/**
 * Initiate a fuzzy search for dirEntries matching the search object
 * @param {String} path Human-readable path to search for
 * @param {Object} search The search object
 * @returns {Promise<DirEntry>} A Promise that resolves to a Directory Entry, or null if not found
 */
function fuzzySearch(path, search) {
    return new Promise(function (resolve, reject) {
        console.log('Initiating fuzzy search for ' + path + '...');
        uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
        var searchResolved = false;
        // setTimeout(function () {
        //     if (!searchResolved) uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
        // }, 5000);
        appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(path, search, function (dirEntry) {
            if (!search.found && dirEntry && dirEntry[0] && dirEntry[0].url) {
                search.found++;
                dirEntry = dirEntry[0];
                dirEntry = transformZimit.filterReplayFiles(dirEntry);
                if (dirEntry) console.debug('Found ' + dirEntry.url + ' in fuzzy search');
                searchResolved = true;
                resolve(dirEntry);
            } else {
                console.debug('No fuzzy search results found');
                searchResolved = true;
                resolve(null);
            }
        }, null);
    });
}

/**
 *
 * @param {callbackDirEntry} callback
 */
ZIMArchive.prototype.getRandomDirEntry = function (callback) {
    // Prefer an article-only (v1) title pointer list, if available
    var articleCount = this._file.articleCount || this._file.entryCount;
    var index = Math.floor(Math.random() * articleCount);
    this._file.dirEntryByTitleIndex(index).then(callback);
};

/**
 * Read a Metadata string inside the ZIM file.
 * @param {String} key
 * @param {callbackMetadata} callback
 */
ZIMArchive.prototype.getMetadata = function (key, callback) {
    var that = this;
    this.getDirEntryByPath('M/' + key).then(function (dirEntry) {
        if (dirEntry === null || dirEntry === undefined) {
            console.warn('Title M/' + key + ' not found in the archive');
            callback();
        } else {
            that.readUtf8File(dirEntry, function (dirEntryRead, data) {
                callback(data);
            });
        }
    }).catch(function (e) {
        console.warn('Metadata with key ' + key + ' not found in the archive', e);
        callback();
    });
};

export default {
    ZIMArchive: ZIMArchive
};