kiwix-js-pwa/www/js/lib/zimArchive.js
Jaifroid cb15c17438 Adopt changes from Kiwix JS for MIME type map
Former-commit-id: f8f1f27226d73ebd6ad158d8ce99235e73fe11f1 [formerly 8f8c60e1f9aefada72a7fb81522f8599aef7d18d]
Former-commit-id: 8c1d6f14aa2d8cfb5dea16ee98a5274dc05a7353
2019-05-17 18:37:00 +01:00

325 lines
12 KiB
JavaScript

/**
* zimArchive.js: Support for archives in ZIM format.
*
* Copyright 2015 Mossroy and contributors
* License GPL v3:
*
* This file is part of Kiwix.
*
* Kiwix is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Kiwix is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
*/
'use strict';
define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
function(zimfile, zimDirEntry, util, utf8) {
/**
* ZIM Archive
*
*
* @typedef ZIMArchive
* @property {ZIMFile} _file The ZIM file (instance of ZIMFile, that might physically be split into several actual files)
* @property {String} _language Language of the content
*/
/**
* @callback callbackZIMArchive
* @param {ZIMArchive} zimArchive Ready-to-use ZIMArchive
*/
/**
* @callback callbackMetadata
* @param {String} data metadata string
*/
/**
* Creates a ZIM archive object to access the ZIM file at the given path in the given storage.
* This constructor can also be used with a single File parameter.
*
* @param {StorageFirefoxOS|Array.<Blob>} storage Storage (in this case, the path must be given) or Array of Files (path parameter must be omitted)
* @param {String} path
* @param {callbackZIMArchive} callbackReady
*/
function ZIMArchive(storage, path, callbackReady) {
var that = this;
that._file = null;
that._language = ""; //@TODO
var createZimfile = function(fileArray) {
zimfile.fromFileArray(fileArray).then(function(file) {
that._file = file;
callbackReady(that);
});
};
if (storage && !path) {
var fileList = storage;
// We need to convert the FileList into an Array
var fileArray = [].slice.call(fileList);
// The constructor has been called with an array of File/Blob parameter
createZimfile(fileArray);
}
else {
if (/.*zim..$/.test(path)) {
// split archive
that._searchArchiveParts(storage, path.slice(0, -2)).then(function(fileArray) {
createZimfile(fileArray);
}, function(error) {
alert("Error reading files in split archive " + path + ": " + error);
});
}
else {
storage.get(path).then(function(file) {
createZimfile([file]);
}, function(error) {
alert("Error reading ZIM file " + path + " : " + error);
});
}
}
}
/**
* Searches the directory for all parts of a split archive.
* @param {Storage} storage storage interface
* @param {String} prefixPath path to the split files, missing the "aa" / "ab" / ... suffix.
* @returns {Promise} that resolves to the array of file objects found.
*/
ZIMArchive.prototype._searchArchiveParts = function(storage, prefixPath) {
var fileArray = [];
var nextFile = function(part) {
var suffix = String.fromCharCode(0x61 + Math.floor(part / 26)) + String.fromCharCode(0x61 + part % 26);
return storage.get(prefixPath + suffix)
.then(function(file) {
fileArray.push(file);
return nextFile(part + 1);
}, function(error) {
return fileArray;
});
};
return nextFile(0);
};
/**
*
* @returns {Boolean}
*/
ZIMArchive.prototype.isReady = function() {
return this._file !== null;
};
/**
* Looks for the DirEntry of the main page
* @param {callbackDirEntry} callback
* @returns {Promise} that resolves to the DirEntry
*/
ZIMArchive.prototype.getMainPageDirEntry = function(callback) {
if (this.isReady()) {
var mainPageUrlIndex = this._file.mainPage;
this._file.dirEntryByUrlIndex(mainPageUrlIndex).then(callback);
}
};
/**
*
* @param {String} dirEntryId
* @returns {DirEntry}
*/
ZIMArchive.prototype.parseDirEntryId = function(dirEntryId) {
return zimDirEntry.DirEntry.fromStringId(this._file, dirEntryId);
};
/**
* @callback callbackDirEntryList
* @param {Array.<DirEntry>} dirEntryArray Array of DirEntries found
*/
/**
* Look for DirEntries with title starting with the given prefix.
* For now, ZIM titles are case sensitive.
* So, as workaround, we try several variants of the prefix to find more results.
* This should be enhanced when the ZIM format will be modified to store normalized titles
* See https://phabricator.wikimedia.org/T108536
*
* @param {String} prefix
* @param {Integer} resultSize
* @param {callbackDirEntryList} callback
*/
ZIMArchive.prototype.findDirEntriesWithPrefix = function(prefix, resultSize, callback) {
var that = this;
var prefixVariants = util.removeDuplicateStringsInSmallArray([prefix, util.ucFirstLetter(prefix), util.lcFirstLetter(prefix), util.ucEveryFirstLetter(prefix)]);
var dirEntries = [];
function searchNextVariant() {
if (prefixVariants.length === 0 || dirEntries.length >= resultSize) {
callback(dirEntries);
return;
}
var prefix = prefixVariants[0];
prefixVariants = prefixVariants.slice(1);
that.findDirEntriesWithPrefixCaseSensitive(prefix, resultSize - dirEntries.length, function (newDirEntries) {
dirEntries.push.apply(dirEntries, newDirEntries);
searchNextVariant();
});
}
searchNextVariant();
};
/**
* Look for dirEntries with title starting with the given prefix (case-sensitive)
*
* @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared
* @param {Integer} resultSize The maximum number of results to return
* @param {Function} callback The function to call with the array of dirEntries with titles that begin with prefix
*/
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, callback, startIndex) {
// Save the value of startIndex because value of null has a special meaning in combination with prefix:
// produces a list of matches starting with first match and then next x dirEntries thereafter
var saveStartIndex = startIndex;
startIndex = startIndex || 0;
prefix = prefix || '';
var that = this;
util.binarySearch(startIndex, this._file.articleCount, function(i) {
return that._file.dirEntryByTitleIndex(i).then(function(dirEntry) {
if (dirEntry.namespace < "A") return 1;
if (dirEntry.namespace > "A") return -1;
// We should now be in namespace A
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
});
}, true).then(function(firstIndex) {
var dirEntries = [];
var addDirEntries = function(index) {
if (index >= firstIndex + resultSize || index >= that._file.articleCount)
return {
'dirEntries': dirEntries,
'nextStart': index
};
return that._file.dirEntryByTitleIndex(index).then(function(dirEntry) {
var title = dirEntry.getTitleOrUrl();
if ((saveStartIndex === null || !title.indexOf(prefix)) && dirEntry.namespace === "A")
dirEntries.push(dirEntry);
return addDirEntries(index + 1);
});
};
return addDirEntries(firstIndex);
}).then(function(objWithIndex) {
callback(objWithIndex.dirEntries, objWithIndex.nextStart);
});
};
/**
* @callback callbackDirEntry
* @param {DirEntry} dirEntry The DirEntry found
*/
/**
*
* @param {DirEntry} dirEntry
* @param {callbackDirEntry} callback
*/
ZIMArchive.prototype.resolveRedirect = function(dirEntry, callback) {
this._file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(callback);
};
/**
* @callback callbackStringContent
* @param {String} content String content
*/
/**
*
* @param {DirEntry} dirEntry
* @param {callbackStringContent} callback
*/
ZIMArchive.prototype.readUtf8File = function(dirEntry, callback) {
dirEntry.readData().then(function(data) {
callback(dirEntry, utf8.parse(data));
});
};
/**
* @callback callbackBinaryContent
* @param {Uint8Array} content binary content
*/
/**
* Read a binary file.
* @param {DirEntry} dirEntry
* @param {callbackBinaryContent} callback
*/
ZIMArchive.prototype.readBinaryFile = function(dirEntry, callback) {
return dirEntry.readData().then(function(data) {
callback(dirEntry, data);
});
};
/**
* Searches a DirEntry (article / page) by its title.
* @param {String} title
* @return {Promise} resolving to the DirEntry object or null if not found.
*/
ZIMArchive.prototype.getDirEntryByTitle = function(title) {
var that = this;
return util.binarySearch(0, this._file.articleCount, function(i) {
return that._file.dirEntryByUrlIndex(i).then(function(dirEntry) {
var url = dirEntry.namespace + "/" + dirEntry.url;
if (title < url)
return -1;
else if (title > url)
return 1;
else
return 0;
});
}).then(function(index) {
if (index === null) return null;
return that._file.dirEntryByUrlIndex(index);
}).then(function(dirEntry) {
return dirEntry;
});
};
/**
*
* @param {callbackDirEntry} callback
*/
ZIMArchive.prototype.getRandomDirEntry = function(callback) {
var index = Math.floor(Math.random() * this._file.articleCount);
this._file.dirEntryByUrlIndex(index).then(callback);
};
/**
* Read a Metadata string inside the ZIM file.
* @param {String} key
* @param {callbackMetadata} callback
*/
ZIMArchive.prototype.getMetadata = function (key, callback) {
var that = this;
this.getDirEntryByTitle("M/" + key).then(function (dirEntry) {
if (dirEntry === null || dirEntry === undefined) {
console.warn("Title M/" + key + " not found in the archive");
callback();
} else {
that.readUtf8File(dirEntry, function (dirEntryRead, data) {
callback(data);
});
}
}).fail(function (e) {
console.warn("Metadata with key " + key + " not found in the archive", e);
callback();
});
};
/**
* Functions and classes exposed by this module
*/
return {
ZIMArchive: ZIMArchive
};
});