diff --git a/manifest.webapp b/manifest.webapp index 2468a5a5..ec3ccd85 100644 --- a/manifest.webapp +++ b/manifest.webapp @@ -2,12 +2,12 @@ "version": "1.1.0-SNAPSHOT", "name": "Evopedia", "description": "Offline Wikipedia Viewer", - "launch_path": "/index.html", + "launch_path": "/www/index.html", "icons": { - "16": "/img/icons/evopedia-16.png", - "48": "/img/icons/evopedia-48.png", - "60": "/img/icons/evopedia-60.png", - "128": "/img/icons/evopedia-128.png" + "16": "/www/img/icons/evopedia-16.png", + "48": "/www/img/icons/evopedia-48.png", + "60": "/www/img/icons/evopedia-60.png", + "128": "/www/img/icons/evopedia-128.png" }, "developer": { "name": "Mossroy ", diff --git a/tests.html b/tests.html index 52718b05..53b46410 100644 --- a/tests.html +++ b/tests.html @@ -33,7 +33,7 @@ http://requirejs.org/docs/api.html#jsfiles --> + src="www/js/lib/require.js"> diff --git a/tests/init.js b/tests/init.js index 31dd3525..3df25327 100644 --- a/tests/init.js +++ b/tests/init.js @@ -21,7 +21,7 @@ * along with Evopedia (file LICENSE-GPLv3.txt). If not, see */ require.config({ - baseUrl: 'js/lib', + baseUrl: 'www/js/lib', paths: { 'zepto': 'zepto', 'jquery': 'jquery-2.0.3', @@ -30,4 +30,4 @@ require.config({ } }); -requirejs(['../../tests/tests']); \ No newline at end of file +requirejs(['../../../tests/tests']); \ No newline at end of file diff --git a/www/js/lib/archive.js b/www/js/lib/archive.js index 5be8ecb5..55b586a4 100644 --- a/www/js/lib/archive.js +++ b/www/js/lib/archive.js @@ -1,909 +1,914 @@ -/** - * archive.js : Class for a local Evopedia archive, with the algorithms to read it - * This file handles finding a title in an archive, reading an article in an archive etc - * - * Copyright 2013 Mossroy - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(function(require) { - - // Module dependencies - var normalize_string = require('normalize_string'); - var utf8 = require('utf8'); - var evopediaTitle = require('title'); - var util = require('util'); - var geometry = require('geometry'); - var jQuery = require('jquery'); - var titleIterators = require('titleIterators'); - - // Declare the webworker that can uncompress with bzip2 algorithm - var webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js"); - - // Size of chunks read in the dump files : 128 KB - var CHUNK_SIZE = 131072; - // The maximum number of titles that can have the same name after normalizing - // This is used by the algorithm that searches for a specific article by its name - var MAX_TITLES_WITH_SAME_NORMALIZED_NAME = 30; - // Maximum length of a title - // 300 bytes is arbitrary : we actually do not really know how long the titles will be - // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough - var MAX_TITLE_LENGTH = 300; - // A rectangle representing all the earth globe - var GLOBE_RECTANGLE = new geometry.rect(-181, -90, 361, 181); - - /** - * LocalArchive class : defines a wikipedia dump on the filesystem - */ - function LocalArchive() { - this.dataFiles = new Array(); - this.coordinateFiles = new Array(); - this.titleFile = null; - this.mathIndexFile = null; - this.mathDataFile = null; - this.date = null; - this.language = null; - this.titleSearchFile = null; - this.normalizedTitles = true; - }; - - - /** - * Read the title Files in the given directory, and assign them to the - * current LocalArchive - * - * @param storage - * @param directory - */ - LocalArchive.prototype.readTitleFilesFromStorage = function(storage, directory) { - var currentLocalArchiveInstance = this; - storage.get(directory + 'titles.idx').then(function(file) { - currentLocalArchiveInstance.titleFile = file; - }, function(error) { - alert("Error reading title file in directory " + directory + " : " + error); - }); - storage.get(directory + 'titles_search.idx').then(function(file) { - currentLocalArchiveInstance.titleSearchFile = file; - }, function(error) { - // Do nothing : this file is not mandatory in an archive - }); - }; - - /** - * Read the data Files in the given directory (starting at given index), and - * assign them to the current LocalArchive - * - * @param storage - * @param directory - * @param index - */ - LocalArchive.prototype.readDataFilesFromStorage = function(storage, directory, index) { - var currentLocalArchiveInstance = this; - - var prefixedFileNumber = ""; - if (index < 10) { - prefixedFileNumber = "0" + index; - } else { - prefixedFileNumber = index; - } - storage.get(directory + 'wikipedia_' + prefixedFileNumber + '.dat') - .then(function(file) { - currentLocalArchiveInstance.dataFiles[index] = file; - currentLocalArchiveInstance.readDataFilesFromStorage(storage, directory, - index + 1); - }, function(error) { - // TODO there must be a better to way to detect a FileNotFound - // if (error != "NotFoundError") { - // alert("Error reading data file " + index + " in directory " - // + directory + " : " + error); - // } - }); - }; - - /** - * Read the coordinate Files in the given directory (starting at given index), and - * assign them to the current LocalArchive - * - * @param storage - * @param directory - * @param index - */ - LocalArchive.prototype.readCoordinateFilesFromStorage = function(storage, directory, index) { - var currentLocalArchiveInstance = this; - - var prefixedFileNumber = ""; - if (index < 10) { - prefixedFileNumber = "0" + index; - } else { - prefixedFileNumber = index; - } - storage.get(directory + 'coordinates_' + prefixedFileNumber - + '.idx').then(function(file) { - currentLocalArchiveInstance.coordinateFiles[index] = file; - currentLocalArchiveInstance.readCoordinateFilesFromStorage(storage, directory, - index + 1); - }, function(error) { - // TODO there must be a better to way to detect a FileNotFound - // if (error != "NotFoundError") { - // alert("Error reading coordinates file " + index + " in directory " - // + directory + " : " + error); - // } - }); - }; - - /** - * Read the metadata.txt file in the given directory, and store its content - * in the current instance - * - * @param storage - * @param directory - */ - LocalArchive.prototype.readMetadataFileFromStorage = function(storage, directory) { - var currentLocalArchiveInstance = this; - - storage.get(directory + 'metadata.txt').then(function(file) { - var metadataFile = file; - currentLocalArchiveInstance.readMetadataFile(metadataFile); - }, function(error) { - alert("Error reading metadata.txt file in directory " - + directory + " : " + error); - }); - }; - - /** - * Read the metadata file, in order to populate its values in the current - * instance - * @param {File} file metadata.txt file - */ - LocalArchive.prototype.readMetadataFile = function(file) { - var currentLocalArchiveInstance = this; - var reader = new FileReader(); - reader.onload = function(e) { - var metadata = e.target.result; - currentLocalArchiveInstance.language = /\nlanguage ?\= ?([^ \n]+)/.exec(metadata)[1]; - currentLocalArchiveInstance.date = /\ndate ?\= ?([^ \n]+)/.exec(metadata)[1]; - var normalizedTitlesRegex = /\nnormalized_titles ?\= ?([^ \n]+)/; - if (normalizedTitlesRegex.exec(metadata)) { - var normalizedTitlesInt = normalizedTitlesRegex.exec(metadata)[1]; - if (normalizedTitlesInt === "0") { - currentLocalArchiveInstance.normalizedTitles = false; - } - else { - currentLocalArchiveInstance.normalizedTitles = true; - } - } - else { - currentLocalArchiveInstance.normalizedTitles = true; - } - }; - reader.readAsText(file); - }; - - /** - * Initialize the localArchive from given archive files - * @param {type} archiveFiles - */ - LocalArchive.prototype.initializeFromArchiveFiles = function(archiveFiles) { - var dataFileRegex = /^wikipedia_(\d\d).dat$/; - var coordinateFileRegex = /^coordinates_(\d\d).idx$/; - this.dataFiles = new Array(); - this.coordinateFiles = new Array(); - for (var i=0; i 0) { - var intFileNr = 1 * coordinateFileNr[1]; - this.coordinateFiles[intFileNr - 1] = file; - } - else { - var dataFileNr = dataFileRegex.exec(file.name); - if (dataFileNr && dataFileNr.length > 0) { - var intFileNr = 1 * dataFileNr[1]; - this.dataFiles[intFileNr] = file; - } - } - } - } - } - - }; - - /** - * Initialize the localArchive from given directory, using DeviceStorage - * @param {type} storages List of DeviceStorages available - * @param {type} archiveDirectory - */ - LocalArchive.prototype.initializeFromDeviceStorage = function(storages, archiveDirectory) { - // First, we have to find which DeviceStorage has been selected by the user - // It is the prefix of the archive directory - var storageNameRegex = /^\/([^\/]+)\//; - var regexResults = storageNameRegex.exec(archiveDirectory); - var selectedStorage = null; - if (regexResults && regexResults.length>0) { - var selectedStorageName = regexResults[1]; - for (var i=0; i= titleCount) { - return titles; - } - return iterator.advance().then(function(title) { - if (title == null) - return titles; - titles.push(title); - return addNext(); - }); - } - return addNext(); - }).then(callbackFunction, errorHandler); - }; - - /** - * Look for a title by its name, and call the callbackFunction with this Title - * If the title is not found, the callbackFunction is called with parameter null - * @param titleName - * @param callbackFunction - */ - LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) { - var that = this; - var normalize = this.getNormalizeFunction(); - var normalizedTitleName = normalize(titleName); - - titleIterators.FindPrefixOffset(this.titleFile, titleName, normalize).then(function(offset) { - var iterator = new titleIterators.SequentialTitleIterator(that, offset); - function check(title) { - if (title == null || normalize(title.name) !== normalizedTitleName) { - return null; - } else if (title.name === titleName) { - return title; - } else { - return iterator.advance().then(check); - } - } - return iterator.advance().then(check); - }).then(callbackFunction, errorHandler); - }; - - /** - * Get a random title, and call the callbackFunction with this Title - * @param callbackFunction - */ - LocalArchive.prototype.getRandomTitle = function(callbackFunction) { - var that = this; - var offset = Math.floor(Math.random() * this.titleFile.size); - jQuery.when().then(function() { - var iterator = new titleIterators.SequentialTitleIterator(that, offset); - // call advance twice because we are probably not at the beginning - // of a title - return iterator.advance().then(function() { - return iterator.advance(); - }); - }).then(callbackFunction, errorHandler); - }; - - /** - * Find titles that start with the given prefix, and call the callbackFunction with this list of Titles - * @param prefix - * @param maxSize Maximum number of titles to read - * @param callbackFunction - */ - LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) { - var that = this; - var titles = []; - var normalize = this.getNormalizeFunction(); - prefix = normalize(prefix); - - titleIterators.FindPrefixOffset(this.titleFile, prefix, normalize).then(function(offset) { - var iterator = new titleIterators.SequentialTitleIterator(that, offset); - function addNext() { - if (titles.length >= maxSize) { - return titles; - } - return iterator.advance().then(function(title) { - if (title == null) - return titles; - // check whether this title really starts with the prefix - var name = normalize(title.name); - if (name.length < prefix.length || name.substring(0, prefix.length) != prefix) - return titles; - titles.push(title); - return addNext(); - }); - } - return addNext(); - }).then(callbackFunction, errorHandler); - }; - - - /** - * Read an article from the title instance, and call the - * callbackFunction with the article HTML String - * - * @param title - * @param callbackFunction - */ - LocalArchive.prototype.readArticle = function(title, callbackFunction) { - var dataFile = null; - - var prefixedFileNumber = ""; - if (title.fileNr < 10) { - prefixedFileNumber = "0" + title.fileNr; - } else { - prefixedFileNumber = title.fileNr; - } - var expectedFileName = "wikipedia_" + prefixedFileNumber + ".dat"; - - // Find the good dump file - for (var i = 0; i < this.dataFiles.length; i++) { - var fileName = this.dataFiles[i].name; - // Check if the fileName ends with the expected file name (in case - // of DeviceStorage usage, the fileName is prefixed by the - // directory) - if (fileName.match(expectedFileName + "$") == expectedFileName) { - dataFile = this.dataFiles[i]; - } - } - if (!dataFile) { - // TODO can probably be replaced by some error handler at window level - alert("Oops : some files seem to be missing in your archive. Please report this problem to us by email (see About section), with the names of the archive and article, and the following info : " - + "File number " + title.fileNr + " not found"); - throw new Error("File number " + title.fileNr + " not found"); - } else { - var reader = new FileReader(); - // Read the article in the dataFile, starting with a chunk of CHUNK_SIZE - this.readArticleChunk(title, dataFile, reader, CHUNK_SIZE, callbackFunction); - } - - }; - - /** - * Read a chunk of the dataFile (of the given length) to try to read the - * given article. - * If the bzip2 algorithm works and articleLength of the article is reached, - * call the callbackFunction with the article HTML String. - * Else, recursively call this function with readLength + CHUNK_SIZE - * - * @param title - * @param dataFile - * @param reader - * @param readLength - * @param callbackFunction - */ - LocalArchive.prototype.readArticleChunk = function(title, dataFile, reader, - readLength, callbackFunction) { - var currentLocalArchiveInstance = this; - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Data file read cancelled'); - }; - reader.onload = function(e) { - try { - var compressedArticles = e.target.result; - webworkerBzip2.onerror = function(event){ - // TODO can probably be replaced by some error handler at window level - alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" - + event.message + " filename=" + event.filename + " line number=" + event.lineno ); - throw new Error("Error during bzip2 decompression : " + event.message + " (" + event.filename + ":" + event.lineno + ")"); - }; - webworkerBzip2.onmessage = function(event){ - switch (event.data.cmd){ - case "result": - var htmlArticles = event.data.msg; - // Start reading at offset, and keep length characters - var htmlArticle = htmlArticles.substring(title.blockOffset, - title.blockOffset + title.articleLength); - if (htmlArticle.length >= title.articleLength) { - // Keep only length characters - htmlArticle = htmlArticle.substring(0, title.articleLength); - // Decode UTF-8 encoding - htmlArticle = decodeURIComponent(escape(htmlArticle)); - callbackFunction(title, htmlArticle); - } else { - // TODO : throw exception if we reach the end of the file - currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, - callbackFunction); - } - break; - case "recurse": - currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, callbackFunction); - break; - case "debug": - console.log(event.data.msg); - break; - case "error": - // TODO can probably be replaced by some error handler at window level - alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" - + event.data.msg ); - throw new Error("Error during bzip2 decompression : " + event.data.msg); - break; - } - }; - webworkerBzip2.postMessage({cmd : 'uncompress', msg : - new Uint8Array(compressedArticles)}); - - } - catch (e) { - callbackFunction("Error : " + e); - } - }; - var blob = dataFile.slice(title.blockStart, title.blockStart - + readLength); - - // Read in the image file as a binary string. - reader.readAsArrayBuffer(blob); - }; - - /** - * Load the math image specified by the hex string and call the - * callbackFunction with a base64 encoding of its data. - * - * @param hexString - * @param callbackFunction - */ - LocalArchive.prototype.loadMathImage = function(hexString, callbackFunction) { - var entrySize = 16 + 4 + 4; - var lo = 0; - var hi = this.mathIndexFile.size / entrySize; - - var mathDataFile = this.mathDataFile; - - this.findMathDataPosition(hexString, lo, hi, function(pos, length) { - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Math image file read cancelled'); - }; - var blob = mathDataFile.slice(pos, pos + length); - reader.onload = function(e) { - var byteArray = new Uint8Array(e.target.result); - callbackFunction(util.uint8ArrayToBase64(byteArray)); - }; - reader.readAsArrayBuffer(blob); - }); - }; - - - /** - * Recursive algorithm to find the position of the Math image in the data file - * @param {type} hexString - * @param {type} lo - * @param {type} hi - * @param {type} callbackFunction - */ - LocalArchive.prototype.findMathDataPosition = function(hexString, lo, hi, callbackFunction) { - var entrySize = 16 + 4 + 4; - if (lo >= hi) { - /* TODO error - not found */ - return; - } - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Math image file read cancelled'); - }; - var mid = Math.floor((lo + hi) / 2); - var blob = this.mathIndexFile.slice(mid * entrySize, (mid + 1) * entrySize); - var currentLocalArchiveInstance = this; - reader.onload = function(e) { - var byteArray = new Uint8Array(e.target.result); - var hash = util.uint8ArrayToHex(byteArray.subarray(0, 16)); - if (hash == hexString) { - var pos = util.readIntegerFrom4Bytes(byteArray, 16); - var length = util.readIntegerFrom4Bytes(byteArray, 16 + 4); - callbackFunction(pos, length); - return; - } else if (hexString < hash) { - hi = mid; - } else { - lo = mid + 1; - } - - currentLocalArchiveInstance.findMathDataPosition(hexString, lo, hi, callbackFunction); - }; - // Read the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - - /** - * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance - * @param title - * @param callbackFunction - */ - LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) { - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Title file read cancelled'); - }; - reader.onload = function(e) { - var binaryTitleFile = e.target.result; - var byteArray = new Uint8Array(binaryTitleFile); - - if (byteArray.length === 0) { - // TODO can probably be replaced by some error handler at window level - alert("Oops : there seems to be something wrong in your archive. Please report it to us by email or through Github (see About section), with the names of the archive and article and the following info : " - + "Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); - throw new Error("Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); - } - - var redirectedTitle = title; - redirectedTitle.fileNr = 1 * byteArray[2]; - redirectedTitle.blockStart = util.readIntegerFrom4Bytes(byteArray, 3); - redirectedTitle.blockOffset = util.readIntegerFrom4Bytes(byteArray, 7); - redirectedTitle.articleLength = util.readIntegerFrom4Bytes(byteArray, 11); - - callbackFunction(redirectedTitle); - }; - // Read only the 16 necessary bytes, starting at title.blockStart - var blob = this.titleFile.slice(title.blockStart, title.blockStart + 16); - // Read in the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - /** - * Finds titles that are located inside the given rectangle - * This is the main function, that has to be called from the application - * - * @param {type} rect Rectangle where to look for titles - * @param {type} maxTitles Maximum number of titles to find - * @param callbackFunction Function to call with the list of titles found - */ - LocalArchive.prototype.getTitlesInCoords = function(rect, maxTitles, callbackFunction) { - var normalizedRectangle = rect.normalized(); - var i = 0; - LocalArchive.getTitlesInCoordsInt(this, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, new Array(), callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); - }; - - /** - * Callback function called by getTitlesInCoordsInt (or by itself), in order - * to loop through every coordinate file, and search titles nearby in each - * of them. - * When all the coordinate files are searched, or when enough titles are - * found, another function is called to convert the title positions found - * into Title instances (asynchronously) - * - * @param {type} localArchive - * @param {type} titlePositionsFound - * @param {type} i : index of the coordinate file - * @param {type} maxTitles - * @param {type} normalizedRectangle - * @param {type} callbackFunction - */ - LocalArchive.callbackGetTitlesInCoordsInt = function(localArchive, titlePositionsFound, i, maxTitles, normalizedRectangle, callbackFunction) { - i++; - if (titlePositionsFound.length < maxTitles && i < localArchive.coordinateFiles.length) { - LocalArchive.getTitlesInCoordsInt(localArchive, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, titlePositionsFound, callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); - } - else { - // Search is over : now let's convert the title positions into Title instances - if (titlePositionsFound && titlePositionsFound.length > 0) { - // TODO find out why there are duplicates, and why the maxTitles is not respected - // The statement below removes duplicates and limits its size - // (not correctly because based on indexes of the original array, instead of target array) - // This should be removed when the cause is found - var filteredTitlePositions = titlePositionsFound.filter(function (e, i, arr) { - return arr.lastIndexOf(e) === i && i<=maxTitles; - }); - LocalArchive.readTitlesFromTitleCoordsInTitleFile(localArchive, filteredTitlePositions, 0, new Array(), callbackFunction); - } - else { - callbackFunction(titlePositionsFound); - } - } - }; - - /** - * This function reads a list of title positions, and converts it into a list or Title instances. - * It handles index i, then recursively calls itself for index i+1 - * When all the list is processed, the callbackFunction is called with the Title list - * - * @param {type} localArchive - * @param {type} titlePositionsFound - * @param {type} i - * @param {type} titlesFound - * @param {type} callbackFunction - */ - LocalArchive.readTitlesFromTitleCoordsInTitleFile = function (localArchive, titlePositionsFound, i, titlesFound, callbackFunction) { - var titleOffset = titlePositionsFound[i]; - localArchive.getTitlesStartingAtOffset(titleOffset, 1, function(titleList) { - if (titleList && titleList.length === 1) { - titlesFound.push(titleList[0]); - i++; - if (i= 0 && titlePositionsFound.length >= maxTitles) { - callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, coordinateFileIndex, maxTitles, targetRect, callbackFunction); - return; - } - } - callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, coordinateFileIndex, maxTitles, targetRect, callbackFunction); - } - - }; - // Read 22 bytes in the coordinate files, at coordFilePos index, in order to read the selector and the coordinates - // 2 + 4 + 4 + 3 * 4 = 22 - // As there can be up to 65535 different coordinates, we have to read 22*65535 bytes = 1.44MB - // TODO : This should be improved by reading the file in 2 steps : - // - first read the selector - // - then read the coordinates (reading only the exact necessary bytes) - var blob = localArchive.coordinateFiles[coordinateFileIndex].slice(coordFilePos, coordFilePos + 22*65535); - - // Read in the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - /** - *  Scans the DeviceStorage for archives - * - * @param storages List of DeviceStorage instances - * @param callbackFunction Function to call with the list of directories where archives are found - */ - LocalArchive.scanForArchives = function(storages, callbackFunction) { - var directories = []; - var promises = jQuery.map(storages, function(storage) { - return storage.scanForDirectoriesContainingFile('titles.idx') - .then(function(dirs) { - jQuery.merge(directories, dirs); - return true - }); - }); - jQuery.when.apply(null, promises).then(function() { - callbackFunction(directories); - }, function(error) { - alert("Error scanning your SD card : " + error - + ". If you're using the Firefox OS Simulator, please put the archives in " - + "a 'fake-sdcard' directory inside your Firefox profile " - + "(ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/" - + "profile/fake-sdcard/wikipedia_small_2010-08-14)"); - callbackFunction(null); - }); - }; - - /** - * Normalize the given String, if the current Archive is compatible. - * If it's not, return the given String, as is. - * @param string : string to normalized - * @returns normalized string, or same string if archive is not compatible - */ - LocalArchive.prototype.normalizeStringIfCompatibleArchive = function(string) { - if (this.normalizedTitles === true) { - return normalize_string.normalizeString(string); - } - else { - return string; - } - }; - - /** - * Returns a function that normalizes strings if the current archive is compatible. - * If it is not, returns the identity function. - */ - LocalArchive.prototype.getNormalizeFunction = function() { - if (this.normalizedTitles === true) { - return normalize_string.normalizeString; - } else { - return function(string) { return string; } - } - }; - - /** - * ErrorHandler for FileReader - * @param {type} evt - * @returns {undefined} - */ - function errorHandler(evt) { - switch (evt.target.error.code) { - case evt.target.error.NOT_FOUND_ERR: - alert('File Not Found!'); - break; - case evt.target.error.NOT_READABLE_ERR: - alert('File is not readable'); - break; - case evt.target.error.ABORT_ERR: - break; // noop - default: - alert('An error occurred reading this file.'); - }; - } - - - /** - * Functions and classes exposed by this module - */ - return { - LocalArchive: LocalArchive - }; -}); +/** + * archive.js : Class for a local Evopedia archive, with the algorithms to read it + * This file handles finding a title in an archive, reading an article in an archive etc + * + * Copyright 2013 Mossroy + * License GPL v3: + * + * This file is part of Evopedia. + * + * Evopedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Evopedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Evopedia (file LICENSE-GPLv3.txt). If not, see + */ +define(function(require) { + + // Module dependencies + var normalize_string = require('normalize_string'); + var utf8 = require('utf8'); + var evopediaTitle = require('title'); + var util = require('util'); + var geometry = require('geometry'); + var jQuery = require('jquery'); + var titleIterators = require('titleIterators'); + + // Declare the webworker that can uncompress with bzip2 algorithm + var webworkerBzip2; + try { + // When using the application normally + webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js"); + } + catch(e) { + // When using unit tests + webworkerBzip2 = new Worker("www/js/lib/webworker_bzip2.js"); + } + + // Size of chunks read in the dump files : 128 KB + var CHUNK_SIZE = 131072; + // The maximum number of titles that can have the same name after normalizing + // This is used by the algorithm that searches for a specific article by its name + var MAX_TITLES_WITH_SAME_NORMALIZED_NAME = 30; + // A rectangle representing all the earth globe + var GLOBE_RECTANGLE = new geometry.rect(-181, -90, 361, 181); + + /** + * LocalArchive class : defines a wikipedia dump on the filesystem + */ + function LocalArchive() { + this.dataFiles = new Array(); + this.coordinateFiles = new Array(); + this.titleFile = null; + this.mathIndexFile = null; + this.mathDataFile = null; + this.date = null; + this.language = null; + this.titleSearchFile = null; + this.normalizedTitles = true; + }; + + + /** + * Read the title Files in the given directory, and assign them to the + * current LocalArchive + * + * @param storage + * @param directory + */ + LocalArchive.prototype.readTitleFilesFromStorage = function(storage, directory) { + var currentLocalArchiveInstance = this; + storage.get(directory + 'titles.idx').then(function(file) { + currentLocalArchiveInstance.titleFile = file; + }, function(error) { + alert("Error reading title file in directory " + directory + " : " + error); + }); + storage.get(directory + 'titles_search.idx').then(function(file) { + currentLocalArchiveInstance.titleSearchFile = file; + }, function(error) { + // Do nothing : this file is not mandatory in an archive + }); + }; + + /** + * Read the data Files in the given directory (starting at given index), and + * assign them to the current LocalArchive + * + * @param storage + * @param directory + * @param index + */ + LocalArchive.prototype.readDataFilesFromStorage = function(storage, directory, index) { + var currentLocalArchiveInstance = this; + + var prefixedFileNumber = ""; + if (index < 10) { + prefixedFileNumber = "0" + index; + } else { + prefixedFileNumber = index; + } + storage.get(directory + 'wikipedia_' + prefixedFileNumber + '.dat') + .then(function(file) { + currentLocalArchiveInstance.dataFiles[index] = file; + currentLocalArchiveInstance.readDataFilesFromStorage(storage, directory, + index + 1); + }, function(error) { + // TODO there must be a better to way to detect a FileNotFound + // if (error != "NotFoundError") { + // alert("Error reading data file " + index + " in directory " + // + directory + " : " + error); + // } + }); + }; + + /** + * Read the coordinate Files in the given directory (starting at given index), and + * assign them to the current LocalArchive + * + * @param storage + * @param directory + * @param index + */ + LocalArchive.prototype.readCoordinateFilesFromStorage = function(storage, directory, index) { + var currentLocalArchiveInstance = this; + + var prefixedFileNumber = ""; + if (index < 10) { + prefixedFileNumber = "0" + index; + } else { + prefixedFileNumber = index; + } + storage.get(directory + 'coordinates_' + prefixedFileNumber + + '.idx').then(function(file) { + currentLocalArchiveInstance.coordinateFiles[index] = file; + currentLocalArchiveInstance.readCoordinateFilesFromStorage(storage, directory, + index + 1); + }, function(error) { + // TODO there must be a better to way to detect a FileNotFound + // if (error != "NotFoundError") { + // alert("Error reading coordinates file " + index + " in directory " + // + directory + " : " + error); + // } + }); + }; + + /** + * Read the metadata.txt file in the given directory, and store its content + * in the current instance + * + * @param storage + * @param directory + */ + LocalArchive.prototype.readMetadataFileFromStorage = function(storage, directory) { + var currentLocalArchiveInstance = this; + + storage.get(directory + 'metadata.txt').then(function(file) { + var metadataFile = file; + currentLocalArchiveInstance.readMetadataFile(metadataFile); + }, function(error) { + alert("Error reading metadata.txt file in directory " + + directory + " : " + error); + }); + }; + + /** + * Read the metadata file, in order to populate its values in the current + * instance + * @param {File} file metadata.txt file + */ + LocalArchive.prototype.readMetadataFile = function(file) { + var currentLocalArchiveInstance = this; + var reader = new FileReader(); + reader.onload = function(e) { + var metadata = e.target.result; + currentLocalArchiveInstance.language = /\nlanguage ?\= ?([^ \n]+)/.exec(metadata)[1]; + currentLocalArchiveInstance.date = /\ndate ?\= ?([^ \n]+)/.exec(metadata)[1]; + var normalizedTitlesRegex = /\nnormalized_titles ?\= ?([^ \n]+)/; + if (normalizedTitlesRegex.exec(metadata)) { + var normalizedTitlesInt = normalizedTitlesRegex.exec(metadata)[1]; + if (normalizedTitlesInt === "0") { + currentLocalArchiveInstance.normalizedTitles = false; + } + else { + currentLocalArchiveInstance.normalizedTitles = true; + } + } + else { + currentLocalArchiveInstance.normalizedTitles = true; + } + }; + reader.readAsText(file); + }; + + /** + * Initialize the localArchive from given archive files + * @param {type} archiveFiles + */ + LocalArchive.prototype.initializeFromArchiveFiles = function(archiveFiles) { + var dataFileRegex = /^wikipedia_(\d\d).dat$/; + var coordinateFileRegex = /^coordinates_(\d\d).idx$/; + this.dataFiles = new Array(); + this.coordinateFiles = new Array(); + for (var i=0; i 0) { + var intFileNr = 1 * coordinateFileNr[1]; + this.coordinateFiles[intFileNr - 1] = file; + } + else { + var dataFileNr = dataFileRegex.exec(file.name); + if (dataFileNr && dataFileNr.length > 0) { + var intFileNr = 1 * dataFileNr[1]; + this.dataFiles[intFileNr] = file; + } + } + } + } + } + + }; + + /** + * Initialize the localArchive from given directory, using DeviceStorage + * @param {type} storages List of DeviceStorages available + * @param {type} archiveDirectory + */ + LocalArchive.prototype.initializeFromDeviceStorage = function(storages, archiveDirectory) { + // First, we have to find which DeviceStorage has been selected by the user + // It is the prefix of the archive directory + var storageNameRegex = /^\/([^\/]+)\//; + var regexResults = storageNameRegex.exec(archiveDirectory); + var selectedStorage = null; + if (regexResults && regexResults.length>0) { + var selectedStorageName = regexResults[1]; + for (var i=0; i= titleCount) { + return titles; + } + return iterator.advance().then(function(title) { + if (title === null) + return titles; + titles.push(title); + return addNext(); + }); + } + return addNext(); + }).then(callbackFunction, errorHandler); + }; + + /** + * Look for a title by its name, and call the callbackFunction with this Title + * If the title is not found, the callbackFunction is called with parameter null + * @param titleName + * @param callbackFunction + */ + LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) { + var that = this; + var normalize = this.getNormalizeFunction(); + var normalizedTitleName = normalize(titleName); + + titleIterators.FindPrefixOffset(this.titleFile, titleName, normalize).then(function(offset) { + var iterator = new titleIterators.SequentialTitleIterator(that, offset); + function check(title) { + if (title == null || normalize(title.name) !== normalizedTitleName) { + return null; + } else if (title.name === titleName) { + return title; + } else { + return iterator.advance().then(check); + } + } + return iterator.advance().then(check); + }).then(callbackFunction, errorHandler); + }; + + /** + * Get a random title, and call the callbackFunction with this Title + * @param callbackFunction + */ + LocalArchive.prototype.getRandomTitle = function(callbackFunction) { + var that = this; + var offset = Math.floor(Math.random() * this.titleFile.size); + jQuery.when().then(function() { + var iterator = new titleIterators.SequentialTitleIterator(that, offset); + // call advance twice because we are probably not at the beginning + // of a title + return iterator.advance().then(function() { + return iterator.advance(); + }); + }).then(callbackFunction, errorHandler); + }; + + /** + * Find titles that start with the given prefix, and call the callbackFunction with this list of Titles + * @param prefix + * @param maxSize Maximum number of titles to read + * @param callbackFunction + */ + LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) { + var that = this; + var titles = []; + var normalize = this.getNormalizeFunction(); + prefix = normalize(prefix); + + titleIterators.FindPrefixOffset(this.titleFile, prefix, normalize).then(function(offset) { + var iterator = new titleIterators.SequentialTitleIterator(that, offset); + function addNext() { + if (titles.length >= maxSize) { + return titles; + } + return iterator.advance().then(function(title) { + if (title == null) + return titles; + // check whether this title really starts with the prefix + var name = normalize(title.name); + if (name.length < prefix.length || name.substring(0, prefix.length) != prefix) + return titles; + titles.push(title); + return addNext(); + }); + } + return addNext(); + }).then(callbackFunction, errorHandler); + }; + + + /** + * Read an article from the title instance, and call the + * callbackFunction with the article HTML String + * + * @param title + * @param callbackFunction + */ + LocalArchive.prototype.readArticle = function(title, callbackFunction) { + var dataFile = null; + + var prefixedFileNumber = ""; + if (title.fileNr < 10) { + prefixedFileNumber = "0" + title.fileNr; + } else { + prefixedFileNumber = title.fileNr; + } + var expectedFileName = "wikipedia_" + prefixedFileNumber + ".dat"; + + // Find the good dump file + for (var i = 0; i < this.dataFiles.length; i++) { + var fileName = this.dataFiles[i].name; + // Check if the fileName ends with the expected file name (in case + // of DeviceStorage usage, the fileName is prefixed by the + // directory) + if (fileName.match(expectedFileName + "$") == expectedFileName) { + dataFile = this.dataFiles[i]; + } + } + if (!dataFile) { + // TODO can probably be replaced by some error handler at window level + alert("Oops : some files seem to be missing in your archive. Please report this problem to us by email (see About section), with the names of the archive and article, and the following info : " + + "File number " + title.fileNr + " not found"); + throw new Error("File number " + title.fileNr + " not found"); + } else { + var reader = new FileReader(); + // Read the article in the dataFile, starting with a chunk of CHUNK_SIZE + this.readArticleChunk(title, dataFile, reader, CHUNK_SIZE, callbackFunction); + } + + }; + + /** + * Read a chunk of the dataFile (of the given length) to try to read the + * given article. + * If the bzip2 algorithm works and articleLength of the article is reached, + * call the callbackFunction with the article HTML String. + * Else, recursively call this function with readLength + CHUNK_SIZE + * + * @param title + * @param dataFile + * @param reader + * @param readLength + * @param callbackFunction + */ + LocalArchive.prototype.readArticleChunk = function(title, dataFile, reader, + readLength, callbackFunction) { + var currentLocalArchiveInstance = this; + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Data file read cancelled'); + }; + reader.onload = function(e) { + try { + var compressedArticles = e.target.result; + webworkerBzip2.onerror = function(event){ + // TODO can probably be replaced by some error handler at window level + alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" + + event.message + " filename=" + event.filename + " line number=" + event.lineno ); + throw new Error("Error during bzip2 decompression : " + event.message + " (" + event.filename + ":" + event.lineno + ")"); + }; + webworkerBzip2.onmessage = function(event){ + switch (event.data.cmd){ + case "result": + var htmlArticles = event.data.msg; + // Start reading at offset, and keep length characters + var htmlArticle = htmlArticles.substring(title.blockOffset, + title.blockOffset + title.articleLength); + if (htmlArticle.length >= title.articleLength) { + // Keep only length characters + htmlArticle = htmlArticle.substring(0, title.articleLength); + // Decode UTF-8 encoding + htmlArticle = decodeURIComponent(escape(htmlArticle)); + callbackFunction(title, htmlArticle); + } else { + // TODO : throw exception if we reach the end of the file + currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, + callbackFunction); + } + break; + case "recurse": + currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, callbackFunction); + break; + case "debug": + console.log(event.data.msg); + break; + case "error": + // TODO can probably be replaced by some error handler at window level + alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" + + event.data.msg ); + throw new Error("Error during bzip2 decompression : " + event.data.msg); + break; + } + }; + webworkerBzip2.postMessage({cmd : 'uncompress', msg : + new Uint8Array(compressedArticles)}); + + } + catch (e) { + callbackFunction("Error : " + e); + } + }; + var blob = dataFile.slice(title.blockStart, title.blockStart + + readLength); + + // Read in the image file as a binary string. + reader.readAsArrayBuffer(blob); + }; + + /** + * Load the math image specified by the hex string and call the + * callbackFunction with a base64 encoding of its data. + * + * @param hexString + * @param callbackFunction + */ + LocalArchive.prototype.loadMathImage = function(hexString, callbackFunction) { + var entrySize = 16 + 4 + 4; + var lo = 0; + var hi = this.mathIndexFile.size / entrySize; + + var mathDataFile = this.mathDataFile; + + this.findMathDataPosition(hexString, lo, hi, function(pos, length) { + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Math image file read cancelled'); + }; + var blob = mathDataFile.slice(pos, pos + length); + reader.onload = function(e) { + var byteArray = new Uint8Array(e.target.result); + callbackFunction(util.uint8ArrayToBase64(byteArray)); + }; + reader.readAsArrayBuffer(blob); + }); + }; + + + /** + * Recursive algorithm to find the position of the Math image in the data file + * @param {type} hexString + * @param {type} lo + * @param {type} hi + * @param {type} callbackFunction + */ + LocalArchive.prototype.findMathDataPosition = function(hexString, lo, hi, callbackFunction) { + var entrySize = 16 + 4 + 4; + if (lo >= hi) { + /* TODO error - not found */ + return; + } + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Math image file read cancelled'); + }; + var mid = Math.floor((lo + hi) / 2); + var blob = this.mathIndexFile.slice(mid * entrySize, (mid + 1) * entrySize); + var currentLocalArchiveInstance = this; + reader.onload = function(e) { + var byteArray = new Uint8Array(e.target.result); + var hash = util.uint8ArrayToHex(byteArray.subarray(0, 16)); + if (hash == hexString) { + var pos = util.readIntegerFrom4Bytes(byteArray, 16); + var length = util.readIntegerFrom4Bytes(byteArray, 16 + 4); + callbackFunction(pos, length); + return; + } else if (hexString < hash) { + hi = mid; + } else { + lo = mid + 1; + } + + currentLocalArchiveInstance.findMathDataPosition(hexString, lo, hi, callbackFunction); + }; + // Read the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + + /** + * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance + * @param title + * @param callbackFunction + */ + LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) { + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Title file read cancelled'); + }; + reader.onload = function(e) { + var binaryTitleFile = e.target.result; + var byteArray = new Uint8Array(binaryTitleFile); + + if (byteArray.length === 0) { + // TODO can probably be replaced by some error handler at window level + alert("Oops : there seems to be something wrong in your archive. Please report it to us by email or through Github (see About section), with the names of the archive and article and the following info : " + + "Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); + throw new Error("Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); + } + + var redirectedTitle = title; + redirectedTitle.fileNr = 1 * byteArray[2]; + redirectedTitle.blockStart = util.readIntegerFrom4Bytes(byteArray, 3); + redirectedTitle.blockOffset = util.readIntegerFrom4Bytes(byteArray, 7); + redirectedTitle.articleLength = util.readIntegerFrom4Bytes(byteArray, 11); + + callbackFunction(redirectedTitle); + }; + // Read only the 16 necessary bytes, starting at title.blockStart + var blob = this.titleFile.slice(title.blockStart, title.blockStart + 16); + // Read in the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + /** + * Finds titles that are located inside the given rectangle + * This is the main function, that has to be called from the application + * + * @param {type} rect Rectangle where to look for titles + * @param {type} maxTitles Maximum number of titles to find + * @param callbackFunction Function to call with the list of titles found + */ + LocalArchive.prototype.getTitlesInCoords = function(rect, maxTitles, callbackFunction) { + var normalizedRectangle = rect.normalized(); + var i = 0; + LocalArchive.getTitlesInCoordsInt(this, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, new Array(), callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); + }; + + /** + * Callback function called by getTitlesInCoordsInt (or by itself), in order + * to loop through every coordinate file, and search titles nearby in each + * of them. + * When all the coordinate files are searched, or when enough titles are + * found, another function is called to convert the title positions found + * into Title instances (asynchronously) + * + * @param {type} localArchive + * @param {type} titlePositionsFound + * @param {type} i : index of the coordinate file + * @param {type} maxTitles + * @param {type} normalizedRectangle + * @param {type} callbackFunction + */ + LocalArchive.callbackGetTitlesInCoordsInt = function(localArchive, titlePositionsFound, i, maxTitles, normalizedRectangle, callbackFunction) { + i++; + if (titlePositionsFound.length < maxTitles && i < localArchive.coordinateFiles.length) { + LocalArchive.getTitlesInCoordsInt(localArchive, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, titlePositionsFound, callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); + } + else { + // Search is over : now let's convert the title positions into Title instances + if (titlePositionsFound && titlePositionsFound.length > 0) { + // TODO find out why there are duplicates, and why the maxTitles is not respected + // The statement below removes duplicates and limits its size + // (not correctly because based on indexes of the original array, instead of target array) + // This should be removed when the cause is found + var filteredTitlePositions = titlePositionsFound.filter(function (e, i, arr) { + return arr.lastIndexOf(e) === i && i<=maxTitles; + }); + LocalArchive.readTitlesFromTitleCoordsInTitleFile(localArchive, filteredTitlePositions, 0, new Array(), callbackFunction); + } + else { + callbackFunction(titlePositionsFound); + } + } + }; + + /** + * This function reads a list of title positions, and converts it into a list or Title instances. + * It handles index i, then recursively calls itself for index i+1 + * When all the list is processed, the callbackFunction is called with the Title list + * + * @param {type} localArchive + * @param {type} titlePositionsFound + * @param {type} i + * @param {type} titlesFound + * @param {type} callbackFunction + */ + LocalArchive.readTitlesFromTitleCoordsInTitleFile = function (localArchive, titlePositionsFound, i, titlesFound, callbackFunction) { + var titleOffset = titlePositionsFound[i]; + localArchive.getTitlesStartingAtOffset(titleOffset, 1, function(titleList) { + if (titleList && titleList.length === 1) { + titlesFound.push(titleList[0]); + i++; + if (i= 0 && titlePositionsFound.length >= maxTitles) { + callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, coordinateFileIndex, maxTitles, targetRect, callbackFunction); + return; + } + } + callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, coordinateFileIndex, maxTitles, targetRect, callbackFunction); + } + + }; + // Read 22 bytes in the coordinate files, at coordFilePos index, in order to read the selector and the coordinates + // 2 + 4 + 4 + 3 * 4 = 22 + // As there can be up to 65535 different coordinates, we have to read 22*65535 bytes = 1.44MB + // TODO : This should be improved by reading the file in 2 steps : + // - first read the selector + // - then read the coordinates (reading only the exact necessary bytes) + var blob = localArchive.coordinateFiles[coordinateFileIndex].slice(coordFilePos, coordFilePos + 22*65535); + + // Read in the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + /** + *  Scans the DeviceStorage for archives + * + * @param storages List of DeviceStorage instances + * @param callbackFunction Function to call with the list of directories where archives are found + */ + LocalArchive.scanForArchives = function(storages, callbackFunction) { + var directories = []; + var promises = jQuery.map(storages, function(storage) { + return storage.scanForDirectoriesContainingFile('titles.idx') + .then(function(dirs) { + jQuery.merge(directories, dirs); + return true + }); + }); + jQuery.when.apply(null, promises).then(function() { + callbackFunction(directories); + }, function(error) { + alert("Error scanning your SD card : " + error + + ". If you're using the Firefox OS Simulator, please put the archives in " + + "a 'fake-sdcard' directory inside your Firefox profile " + + "(ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/" + + "profile/fake-sdcard/wikipedia_small_2010-08-14)"); + callbackFunction(null); + }); + }; + + /** + * Normalize the given String, if the current Archive is compatible. + * If it's not, return the given String, as is. + * @param string : string to normalized + * @returns normalized string, or same string if archive is not compatible + */ + LocalArchive.prototype.normalizeStringIfCompatibleArchive = function(string) { + if (this.normalizedTitles === true) { + return normalize_string.normalizeString(string); + } + else { + return string; + } + }; + + /** + * Returns a function that normalizes strings if the current archive is compatible. + * If it is not, returns the identity function. + */ + LocalArchive.prototype.getNormalizeFunction = function() { + if (this.normalizedTitles === true) { + return normalize_string.normalizeString; + } else { + return function(string) { return string; } + } + }; + + /** + * ErrorHandler for FileReader + * @param {type} evt + * @returns {undefined} + */ + function errorHandler(evt) { + switch (evt.target.error.code) { + case evt.target.error.NOT_FOUND_ERR: + alert('File Not Found!'); + break; + case evt.target.error.NOT_READABLE_ERR: + alert('File is not readable'); + break; + case evt.target.error.ABORT_ERR: + break; // noop + default: + alert('An error occurred reading this file.'); + }; + } + + + /** + * Functions and classes exposed by this module + */ + return { + LocalArchive: LocalArchive + }; +}); diff --git a/www/js/lib/titleIterators.js b/www/js/lib/titleIterators.js index d70432da..dfd2028b 100644 --- a/www/js/lib/titleIterators.js +++ b/www/js/lib/titleIterators.js @@ -1,121 +1,126 @@ -/** - * titleIterators.js : Various classes to iterate over titles, for example as a - * result of searching. - * - * Copyright 2014 Evopedia developers - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) { - // Maximum length of a title - // 300 bytes is arbitrary : we actually do not really know how long the titles will be - // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough - var MAX_TITLE_LENGTH = 300; - - /** - * Iterates over all titles starting at the given offset. - * The asynchronous method advance has to be called before this.title is - * valid. - */ - function SequentialTitleIterator(archive, offset) { - this._titleFile = archive.titleFile; - this._archive = archive; - this._offset = offset; - this.title = null; - } - /** - * Advances to the next title (or the first), if possible. - * @returns jQuery promise containing the next title or null if there is no - * next title - */ - SequentialTitleIterator.prototype.advance = function() { - if (this._offset >= this._titleFile.size) { - this.title = null; - return jQuery.when(this.title); - } - var that = this; - return util.readFileSlice(this._titleFile, this._offset, - this._offset + MAX_TITLE_LENGTH).then(function(byteArray) { - var newLineIndex = 15; - while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) { - newLineIndex++; - } - var encodedTitle = byteArray.subarray(0, newLineIndex); - that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset); - that._offset += newLineIndex + 1; - return that._title; - }); - } - - /** - * Searches for the offset into the given title file where the first title - * with the given prefix (or lexicographically larger) is located. - * The given function normalize is applied to every title before comparison. - * @returns jQuery promise giving the offset - */ - function FindPrefixOffset(titleFile, prefix, normalize) { - prefix = normalize(prefix); - var lo = 0; - var hi = titleFile.size; - var iterate = function() { - if (lo >= hi) { - if (lo > 0) - lo += 2; // Let lo point to the start of an entry - return jQuery.when(lo); - } else { - var mid = Math.floor((lo + hi) / 2); - return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) { - // Look for the index of the next NewLine - var newLineIndex = 0; - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - var startIndex = 0; - if (mid > 0) { - startIndex = newLineIndex + 16; - newLineIndex = startIndex; - // Look for the index of the next NewLine - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - } - if (newLineIndex === startIndex) { - // End of file reached - hi = mid; - } else { - var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex))); - if (normalizedTitle < prefix) { - lo = mid + newLineIndex - 1; - } else { - hi = mid; - } - } - return iterate(); - }); - } - } - return iterate(); - } - - /** - * Functions and classes exposed by this module - */ - return { - SequentialTitleIterator : SequentialTitleIterator, - FindPrefixOffset : FindPrefixOffset - }; -}); +/** + * titleIterators.js : Various classes to iterate over titles, for example as a + * result of searching. + * + * Copyright 2014 Evopedia developers + * License GPL v3: + * + * This file is part of Evopedia. + * + * Evopedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Evopedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Evopedia (file LICENSE-GPLv3.txt). If not, see + */ +define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) { + // Maximum length of a title + // 300 bytes is arbitrary : we actually do not really know how long the titles will be + // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough + var MAX_TITLE_LENGTH = 300; + + /** + * Iterates over all titles starting at the given offset. + * The asynchronous method advance has to be called before this.title is + * valid. + * @param archive + * @param offset + */ + function SequentialTitleIterator(archive, offset) { + this._titleFile = archive.titleFile; + this._archive = archive; + this._offset = offset; + this.title = null; + }; + /** + * Advances to the next title (or the first), if possible. + * @returns jQuery promise containing the next title or null if there is no + * next title + */ + SequentialTitleIterator.prototype.advance = function() { + if (this._offset >= this._titleFile.size) { + this.title = null; + return jQuery.when(this.title); + } + var that = this; + return util.readFileSlice(this._titleFile, this._offset, + this._offset + MAX_TITLE_LENGTH).then(function(byteArray) { + var newLineIndex = 15; + while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) { + newLineIndex++; + } + var encodedTitle = byteArray.subarray(0, newLineIndex); + that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset); + that._offset += newLineIndex + 1; + return that._title; + }); + }; + + /** + * Searches for the offset into the given title file where the first title + * with the given prefix (or lexicographically larger) is located. + * The given function normalize is applied to every title before comparison. + * @param titleFile + * @param prefix + * @param normalize function to be applied to every title before comparison + * @returns jQuery promise giving the offset + */ + function FindPrefixOffset(titleFile, prefix, normalize) { + prefix = normalize(prefix); + var lo = 0; + var hi = titleFile.size; + var iterate = function() { + if (lo >= hi) { + if (lo > 0) + lo += 2; // Let lo point to the start of an entry + return jQuery.when(lo); + } else { + var mid = Math.floor((lo + hi) / 2); + return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) { + // Look for the index of the next NewLine + var newLineIndex = 0; + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + var startIndex = 0; + if (mid > 0) { + startIndex = newLineIndex + 16; + newLineIndex = startIndex; + // Look for the index of the next NewLine + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + } + if (newLineIndex === startIndex) { + // End of file reached + hi = mid; + } else { + var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex))); + if (normalizedTitle < prefix) { + lo = mid + newLineIndex - 1; + } else { + hi = mid; + } + } + return iterate(); + }); + } + }; + return iterate(); + } + + /** + * Functions and classes exposed by this module + */ + return { + SequentialTitleIterator : SequentialTitleIterator, + FindPrefixOffset : FindPrefixOffset + }; +});