mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-24 04:54:51 -04:00
Added title iterator as preparation for infix search.
This commit is contained in:
parent
c592027d1d
commit
9430603001
@ -29,6 +29,7 @@ define(function(require) {
|
|||||||
var util = require('util');
|
var util = require('util');
|
||||||
var geometry = require('geometry');
|
var geometry = require('geometry');
|
||||||
var jQuery = require('jquery');
|
var jQuery = require('jquery');
|
||||||
|
var titleIterators = require('titleIterators');
|
||||||
|
|
||||||
// Declare the webworker that can uncompress with bzip2 algorithm
|
// Declare the webworker that can uncompress with bzip2 algorithm
|
||||||
var webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js");
|
var webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js");
|
||||||
@ -303,114 +304,30 @@ define(function(require) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* This function is recursively called after each asynchronous read, so that
|
|
||||||
* to find the closest index in titleFile to the given prefix
|
|
||||||
* When found, call the callbackFunction with the index
|
|
||||||
*
|
|
||||||
* @param reader
|
|
||||||
* @param normalizedPrefix
|
|
||||||
* @param lo
|
|
||||||
* @param hi
|
|
||||||
* @param callbackFunction
|
|
||||||
*/
|
|
||||||
LocalArchive.prototype.recursivePrefixSearch = function(reader, normalizedPrefix, lo, hi, callbackFunction) {
|
|
||||||
if (lo < hi - 1) {
|
|
||||||
var mid = Math.floor((lo + hi) / 2);
|
|
||||||
var blob = this.titleFile.slice(mid, mid + MAX_TITLE_LENGTH);
|
|
||||||
var currentLocalArchiveInstance = this;
|
|
||||||
reader.onload = function(e) {
|
|
||||||
var binaryTitleFile = e.target.result;
|
|
||||||
var byteArray = new Uint8Array(binaryTitleFile);
|
|
||||||
// Look for the index of the next NewLine
|
|
||||||
var newLineIndex = 0;
|
|
||||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
|
||||||
newLineIndex++;
|
|
||||||
}
|
|
||||||
var startIndex = 0;
|
|
||||||
if (mid > 0) {
|
|
||||||
startIndex = newLineIndex + 16;
|
|
||||||
newLineIndex = startIndex;
|
|
||||||
// Look for the index of the next NewLine
|
|
||||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
|
||||||
newLineIndex++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (newLineIndex === startIndex) {
|
|
||||||
// End of file reached
|
|
||||||
hi = mid;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
var normalizedTitle = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(
|
|
||||||
utf8.parse(byteArray.subarray(startIndex, newLineIndex)));
|
|
||||||
if (normalizedTitle < normalizedPrefix) {
|
|
||||||
lo = mid + newLineIndex - 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
hi = mid;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
currentLocalArchiveInstance.recursivePrefixSearch(reader, normalizedPrefix, lo, hi, callbackFunction);
|
|
||||||
};
|
|
||||||
// Read the file as a binary string
|
|
||||||
reader.readAsArrayBuffer(blob);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (lo > 0) {
|
|
||||||
// Let lo point to the start of an entry
|
|
||||||
lo++;
|
|
||||||
lo++;
|
|
||||||
}
|
|
||||||
// We found the closest title at index lo
|
|
||||||
callbackFunction(lo);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances
|
* Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances
|
||||||
* @param titleOffset
|
* @param titleOffset offset into the title file - it has to point excatly
|
||||||
|
* to the start of a title entry
|
||||||
* @param titleCount maximum number of titles to retrieve
|
* @param titleCount maximum number of titles to retrieve
|
||||||
* @param callbackFunction
|
* @param callbackFunction
|
||||||
*/
|
*/
|
||||||
LocalArchive.prototype.getTitlesStartingAtOffset = function(titleOffset, titleCount, callbackFunction) {
|
LocalArchive.prototype.getTitlesStartingAtOffset = function(titleOffset, titleCount, callbackFunction) {
|
||||||
var reader = new FileReader();
|
var titles = [];
|
||||||
reader.onerror = errorHandler;
|
jQuery.when().then(function() {
|
||||||
reader.onabort = function(e) {
|
var iterator = new titleIterators.SequentialTitleIterator(this, titleOffset);
|
||||||
alert('Title file read cancelled');
|
function addNext() {
|
||||||
};
|
if (titles.length >= titleCount) {
|
||||||
|
return titles;
|
||||||
var currentLocalArchiveInstance = this;
|
|
||||||
reader.onload = function(e) {
|
|
||||||
var binaryTitleFile = e.target.result;
|
|
||||||
var byteArray = new Uint8Array(binaryTitleFile);
|
|
||||||
var i = 0;
|
|
||||||
var newLineIndex = 0;
|
|
||||||
var titleNumber = 0;
|
|
||||||
var titleList = new Array();
|
|
||||||
while (i < byteArray.length && titleNumber < titleCount) {
|
|
||||||
// Look for the index of the next NewLine
|
|
||||||
newLineIndex += 15;
|
|
||||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) {
|
|
||||||
newLineIndex++;
|
|
||||||
}
|
}
|
||||||
|
return iterator.advance().then(function(title) {
|
||||||
// Copy the encodedTitle in a new Array
|
if (title == null)
|
||||||
var encodedTitle = new Uint8Array(newLineIndex - i);
|
return titles;
|
||||||
for (var j = 0; j < newLineIndex - i; j++) {
|
titles.push(title);
|
||||||
encodedTitle[j] = byteArray[i + j];
|
return addNext();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
return addNext();
|
||||||
var title = evopediaTitle.Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i);
|
}).then(callbackFunction, errorHandler);
|
||||||
|
|
||||||
titleList[titleNumber] = title;
|
|
||||||
titleNumber++;
|
|
||||||
i = newLineIndex + 1;
|
|
||||||
}
|
|
||||||
callbackFunction(titleList);
|
|
||||||
};
|
|
||||||
var blob = this.titleFile.slice(titleOffset, titleOffset + titleCount * MAX_TITLE_LENGTH);
|
|
||||||
// Read in the file as a binary string
|
|
||||||
reader.readAsArrayBuffer(blob);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -420,30 +337,23 @@ define(function(require) {
|
|||||||
* @param callbackFunction
|
* @param callbackFunction
|
||||||
*/
|
*/
|
||||||
LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
|
LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
|
||||||
var titleFileSize = this.titleFile.size;
|
var that = this;
|
||||||
var reader = new FileReader();
|
var normalize = this.getNormalizeFunction();
|
||||||
reader.onerror = errorHandler;
|
var normalizedTitleName = normalize(titleName);
|
||||||
reader.onabort = function(e) {
|
|
||||||
alert('Title file read cancelled');
|
titleIterators.FindPrefixOffset(this.titleFile, titleName, normalize).then(function(offset) {
|
||||||
};
|
var iterator = new titleIterators.SequentialTitleIterator(that, offset);
|
||||||
var currentLocalArchiveInstance = this;
|
function check(title) {
|
||||||
var normalizedTitleName = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(titleName);
|
if (title == null || normalize(title.name) !== normalizedTitleName) {
|
||||||
this.recursivePrefixSearch(reader, normalizedTitleName, 0, titleFileSize, function(titleOffset) {
|
return null;
|
||||||
currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, MAX_TITLES_WITH_SAME_NORMALIZED_NAME, function(titleList) {
|
} else if (title.name === titleName) {
|
||||||
if (titleList !== null && titleList.length>0) {
|
return title;
|
||||||
for (var i=0; i<titleList.length; i++) {
|
} else {
|
||||||
var title = titleList[i];
|
return iterator.advance().then(check);
|
||||||
if (title.name === titleName) {
|
|
||||||
// The title has been found
|
|
||||||
callbackFunction(title);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
return iterator.advance().then(check);
|
||||||
// The title has not been found
|
}).then(callbackFunction, errorHandler);
|
||||||
callbackFunction(null);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -461,32 +371,30 @@ define(function(require) {
|
|||||||
* @param callbackFunction
|
* @param callbackFunction
|
||||||
*/
|
*/
|
||||||
LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) {
|
LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) {
|
||||||
var titleFileSize = this.titleFile.size;
|
var that = this;
|
||||||
if (prefix) {
|
var titles = [];
|
||||||
prefix = this.normalizeStringIfCompatibleArchive(prefix);
|
var normalize = this.getNormalizeFunction();
|
||||||
}
|
prefix = normalize(prefix);
|
||||||
|
|
||||||
var reader = new FileReader();
|
titleIterators.FindPrefixOffset(this.titleFile, prefix, normalize).then(function(offset) {
|
||||||
reader.onerror = errorHandler;
|
var iterator = new titleIterators.SequentialTitleIterator(that, offset);
|
||||||
reader.onabort = function(e) {
|
function addNext() {
|
||||||
alert('Title file read cancelled');
|
if (titles.length >= maxSize) {
|
||||||
};
|
return titles;
|
||||||
var currentLocalArchiveInstance = this;
|
|
||||||
var normalizedPrefix = this.normalizeStringIfCompatibleArchive(prefix);
|
|
||||||
this.recursivePrefixSearch(reader, normalizedPrefix, 0, titleFileSize, function(titleOffset) {
|
|
||||||
currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, maxSize, function(titleList) {
|
|
||||||
// Keep only the titles with names starting with the prefix
|
|
||||||
var i = 0;
|
|
||||||
for (i = 0; i < titleList.length; i++) {
|
|
||||||
var titleName = titleList[i].name;
|
|
||||||
var normalizedTitleName = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(titleName);
|
|
||||||
if (normalizedTitleName.length < normalizedPrefix.length || normalizedTitleName.substring(0, normalizedPrefix.length) !== normalizedPrefix) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
return iterator.advance().then(function(title) {
|
||||||
|
if (title == null)
|
||||||
|
return titles;
|
||||||
|
// check whether this title really starts with the prefix
|
||||||
|
var name = normalize(title.name);
|
||||||
|
if (name.length < prefix.length || name.substring(0, prefix.length) != prefix)
|
||||||
|
return titles;
|
||||||
|
titles.push(title);
|
||||||
|
return addNext();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
callbackFunction(titleList.slice(0, i));
|
return addNext();
|
||||||
});
|
}).then(callbackFunction, errorHandler);
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -950,6 +858,18 @@ define(function(require) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a function that normalizes strings if the current archive is compatible.
|
||||||
|
* If it is not, returns the identity function.
|
||||||
|
*/
|
||||||
|
LocalArchive.prototype.getNormalizeFunction = function() {
|
||||||
|
if (this.normalizedTitles === true) {
|
||||||
|
return normalize_string.normalizeString;
|
||||||
|
} else {
|
||||||
|
return function(string) { return string; }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ErrorHandler for FileReader
|
* ErrorHandler for FileReader
|
||||||
* @param {type} evt
|
* @param {type} evt
|
||||||
|
121
www/js/lib/titleIterators.js
Normal file
121
www/js/lib/titleIterators.js
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/**
|
||||||
|
* titleIterators.js : Various classes to iterate over titles, for example as a
|
||||||
|
* result of searching.
|
||||||
|
*
|
||||||
|
* Copyright 2014 Evopedia developers
|
||||||
|
* License GPL v3:
|
||||||
|
*
|
||||||
|
* This file is part of Evopedia.
|
||||||
|
*
|
||||||
|
* Evopedia is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Evopedia is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||||
|
*/
|
||||||
|
define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) {
|
||||||
|
// Maximum length of a title
|
||||||
|
// 300 bytes is arbitrary : we actually do not really know how long the titles will be
|
||||||
|
// But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough
|
||||||
|
var MAX_TITLE_LENGTH = 300;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterates over all titles starting at the given offset.
|
||||||
|
* The asynchronous method advance has to be called before this.title is
|
||||||
|
* valid.
|
||||||
|
*/
|
||||||
|
function SequentialTitleIterator(archive, offset) {
|
||||||
|
this._titleFile = archive.titleFile;
|
||||||
|
this._archive = archive;
|
||||||
|
this._offset = offset;
|
||||||
|
this.title = null;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Advances to the next title (or the first), if possible.
|
||||||
|
* @returns jQuery promise containing the next title or null if there is no
|
||||||
|
* next title
|
||||||
|
*/
|
||||||
|
SequentialTitleIterator.prototype.advance = function() {
|
||||||
|
if (this._offset >= this._titleFile.size) {
|
||||||
|
this.title = null;
|
||||||
|
return jQuery.when(this.title);
|
||||||
|
}
|
||||||
|
var that = this;
|
||||||
|
return util.readFileSlice(this._titleFile, this._offset,
|
||||||
|
this._offset + MAX_TITLE_LENGTH).then(function(byteArray) {
|
||||||
|
var newLineIndex = 15;
|
||||||
|
while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) {
|
||||||
|
newLineIndex++;
|
||||||
|
}
|
||||||
|
var encodedTitle = byteArray.subarray(0, newLineIndex);
|
||||||
|
that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset);
|
||||||
|
that._offset += newLineIndex + 1;
|
||||||
|
return that._title;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searches for the offset into the given title file where the first title
|
||||||
|
* with the given prefix (or lexicographically larger) is located.
|
||||||
|
* The given function normalize is applied to every title before comparison.
|
||||||
|
* @returns jQuery promise giving the offset
|
||||||
|
*/
|
||||||
|
function FindPrefixOffset(titleFile, prefix, normalize) {
|
||||||
|
prefix = normalize(prefix);
|
||||||
|
var lo = 0;
|
||||||
|
var hi = titleFile.size;
|
||||||
|
var iterate = function() {
|
||||||
|
if (lo >= hi) {
|
||||||
|
if (lo > 0)
|
||||||
|
lo += 2; // Let lo point to the start of an entry
|
||||||
|
return jQuery.when(lo);
|
||||||
|
} else {
|
||||||
|
var mid = Math.floor((lo + hi) / 2);
|
||||||
|
return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) {
|
||||||
|
// Look for the index of the next NewLine
|
||||||
|
var newLineIndex = 0;
|
||||||
|
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||||
|
newLineIndex++;
|
||||||
|
}
|
||||||
|
var startIndex = 0;
|
||||||
|
if (mid > 0) {
|
||||||
|
startIndex = newLineIndex + 16;
|
||||||
|
newLineIndex = startIndex;
|
||||||
|
// Look for the index of the next NewLine
|
||||||
|
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||||
|
newLineIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (newLineIndex === startIndex) {
|
||||||
|
// End of file reached
|
||||||
|
hi = mid;
|
||||||
|
} else {
|
||||||
|
var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex)));
|
||||||
|
if (normalizedTitle < prefix) {
|
||||||
|
lo = mid + newLineIndex - 1;
|
||||||
|
} else {
|
||||||
|
hi = mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return iterate();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return iterate();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Functions and classes exposed by this module
|
||||||
|
*/
|
||||||
|
return {
|
||||||
|
SequentialTitleIterator : SequentialTitleIterator,
|
||||||
|
FindPrefixOffset : FindPrefixOffset
|
||||||
|
};
|
||||||
|
});
|
@ -20,6 +20,7 @@
|
|||||||
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||||
*/
|
*/
|
||||||
define(function(require) {
|
define(function(require) {
|
||||||
|
var jQuery = require('jquery');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function : return true if the given string ends with the suffix
|
* Utility function : return true if the given string ends with the suffix
|
||||||
@ -113,6 +114,25 @@ define(function(require) {
|
|||||||
return (r > 0 ? enc.slice(0, r - 3) : enc) + '==='.slice(r || 3);
|
return (r > 0 ? enc.slice(0, r - 3) : enc) + '==='.slice(r || 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a Uint8Array from the given file starting at byte offset begin and
|
||||||
|
* not including byte offset end.
|
||||||
|
* @returns jQuery promise
|
||||||
|
*/
|
||||||
|
function readFileSlice(file, begin, end) {
|
||||||
|
var deferred = jQuery.Deferred();
|
||||||
|
var reader = new FileReader();
|
||||||
|
reader.onload = function(e) {
|
||||||
|
deferred.resolve(new Uint8Array(e.target.result));
|
||||||
|
}
|
||||||
|
reader.onerror = reader.onabort = function(e) {
|
||||||
|
deferred.reject(e);
|
||||||
|
}
|
||||||
|
reader.readAsArrayBuffer(file.slice(begin, end));
|
||||||
|
return deferred.promise();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Functions and classes exposed by this module
|
* Functions and classes exposed by this module
|
||||||
*/
|
*/
|
||||||
@ -122,6 +142,7 @@ define(function(require) {
|
|||||||
readIntegerFrom2Bytes : readIntegerFrom2Bytes,
|
readIntegerFrom2Bytes : readIntegerFrom2Bytes,
|
||||||
readFloatFrom4Bytes : readFloatFrom4Bytes,
|
readFloatFrom4Bytes : readFloatFrom4Bytes,
|
||||||
uint8ArrayToHex : uint8ArrayToHex,
|
uint8ArrayToHex : uint8ArrayToHex,
|
||||||
uint8ArrayToBase64 : uint8ArrayToBase64
|
uint8ArrayToBase64 : uint8ArrayToBase64,
|
||||||
|
readFileSlice : readFileSlice
|
||||||
};
|
};
|
||||||
});
|
});
|
Loading…
x
Reference in New Issue
Block a user