mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-22 03:52:21 -04:00
Added title iterator as preparation for infix search.
This commit is contained in:
parent
c592027d1d
commit
9430603001
@ -29,6 +29,7 @@ define(function(require) {
|
||||
var util = require('util');
|
||||
var geometry = require('geometry');
|
||||
var jQuery = require('jquery');
|
||||
var titleIterators = require('titleIterators');
|
||||
|
||||
// Declare the webworker that can uncompress with bzip2 algorithm
|
||||
var webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js");
|
||||
@ -303,114 +304,30 @@ define(function(require) {
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* This function is recursively called after each asynchronous read, so that
|
||||
* to find the closest index in titleFile to the given prefix
|
||||
* When found, call the callbackFunction with the index
|
||||
*
|
||||
* @param reader
|
||||
* @param normalizedPrefix
|
||||
* @param lo
|
||||
* @param hi
|
||||
* @param callbackFunction
|
||||
*/
|
||||
LocalArchive.prototype.recursivePrefixSearch = function(reader, normalizedPrefix, lo, hi, callbackFunction) {
|
||||
if (lo < hi - 1) {
|
||||
var mid = Math.floor((lo + hi) / 2);
|
||||
var blob = this.titleFile.slice(mid, mid + MAX_TITLE_LENGTH);
|
||||
var currentLocalArchiveInstance = this;
|
||||
reader.onload = function(e) {
|
||||
var binaryTitleFile = e.target.result;
|
||||
var byteArray = new Uint8Array(binaryTitleFile);
|
||||
// Look for the index of the next NewLine
|
||||
var newLineIndex = 0;
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||
newLineIndex++;
|
||||
}
|
||||
var startIndex = 0;
|
||||
if (mid > 0) {
|
||||
startIndex = newLineIndex + 16;
|
||||
newLineIndex = startIndex;
|
||||
// Look for the index of the next NewLine
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||
newLineIndex++;
|
||||
}
|
||||
}
|
||||
if (newLineIndex === startIndex) {
|
||||
// End of file reached
|
||||
hi = mid;
|
||||
}
|
||||
else {
|
||||
var normalizedTitle = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(
|
||||
utf8.parse(byteArray.subarray(startIndex, newLineIndex)));
|
||||
if (normalizedTitle < normalizedPrefix) {
|
||||
lo = mid + newLineIndex - 1;
|
||||
}
|
||||
else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
currentLocalArchiveInstance.recursivePrefixSearch(reader, normalizedPrefix, lo, hi, callbackFunction);
|
||||
};
|
||||
// Read the file as a binary string
|
||||
reader.readAsArrayBuffer(blob);
|
||||
}
|
||||
else {
|
||||
if (lo > 0) {
|
||||
// Let lo point to the start of an entry
|
||||
lo++;
|
||||
lo++;
|
||||
}
|
||||
// We found the closest title at index lo
|
||||
callbackFunction(lo);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances
|
||||
* @param titleOffset
|
||||
* @param titleOffset offset into the title file - it has to point excatly
|
||||
* to the start of a title entry
|
||||
* @param titleCount maximum number of titles to retrieve
|
||||
* @param callbackFunction
|
||||
*/
|
||||
LocalArchive.prototype.getTitlesStartingAtOffset = function(titleOffset, titleCount, callbackFunction) {
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
reader.onabort = function(e) {
|
||||
alert('Title file read cancelled');
|
||||
};
|
||||
|
||||
var currentLocalArchiveInstance = this;
|
||||
reader.onload = function(e) {
|
||||
var binaryTitleFile = e.target.result;
|
||||
var byteArray = new Uint8Array(binaryTitleFile);
|
||||
var i = 0;
|
||||
var newLineIndex = 0;
|
||||
var titleNumber = 0;
|
||||
var titleList = new Array();
|
||||
while (i < byteArray.length && titleNumber < titleCount) {
|
||||
// Look for the index of the next NewLine
|
||||
newLineIndex += 15;
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) {
|
||||
newLineIndex++;
|
||||
var titles = [];
|
||||
jQuery.when().then(function() {
|
||||
var iterator = new titleIterators.SequentialTitleIterator(this, titleOffset);
|
||||
function addNext() {
|
||||
if (titles.length >= titleCount) {
|
||||
return titles;
|
||||
}
|
||||
|
||||
// Copy the encodedTitle in a new Array
|
||||
var encodedTitle = new Uint8Array(newLineIndex - i);
|
||||
for (var j = 0; j < newLineIndex - i; j++) {
|
||||
encodedTitle[j] = byteArray[i + j];
|
||||
}
|
||||
|
||||
var title = evopediaTitle.Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i);
|
||||
|
||||
titleList[titleNumber] = title;
|
||||
titleNumber++;
|
||||
i = newLineIndex + 1;
|
||||
return iterator.advance().then(function(title) {
|
||||
if (title == null)
|
||||
return titles;
|
||||
titles.push(title);
|
||||
return addNext();
|
||||
});
|
||||
}
|
||||
callbackFunction(titleList);
|
||||
};
|
||||
var blob = this.titleFile.slice(titleOffset, titleOffset + titleCount * MAX_TITLE_LENGTH);
|
||||
// Read in the file as a binary string
|
||||
reader.readAsArrayBuffer(blob);
|
||||
return addNext();
|
||||
}).then(callbackFunction, errorHandler);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -420,30 +337,23 @@ define(function(require) {
|
||||
* @param callbackFunction
|
||||
*/
|
||||
LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
|
||||
var titleFileSize = this.titleFile.size;
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
reader.onabort = function(e) {
|
||||
alert('Title file read cancelled');
|
||||
};
|
||||
var currentLocalArchiveInstance = this;
|
||||
var normalizedTitleName = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(titleName);
|
||||
this.recursivePrefixSearch(reader, normalizedTitleName, 0, titleFileSize, function(titleOffset) {
|
||||
currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, MAX_TITLES_WITH_SAME_NORMALIZED_NAME, function(titleList) {
|
||||
if (titleList !== null && titleList.length>0) {
|
||||
for (var i=0; i<titleList.length; i++) {
|
||||
var title = titleList[i];
|
||||
if (title.name === titleName) {
|
||||
// The title has been found
|
||||
callbackFunction(title);
|
||||
return;
|
||||
}
|
||||
}
|
||||
var that = this;
|
||||
var normalize = this.getNormalizeFunction();
|
||||
var normalizedTitleName = normalize(titleName);
|
||||
|
||||
titleIterators.FindPrefixOffset(this.titleFile, titleName, normalize).then(function(offset) {
|
||||
var iterator = new titleIterators.SequentialTitleIterator(that, offset);
|
||||
function check(title) {
|
||||
if (title == null || normalize(title.name) !== normalizedTitleName) {
|
||||
return null;
|
||||
} else if (title.name === titleName) {
|
||||
return title;
|
||||
} else {
|
||||
return iterator.advance().then(check);
|
||||
}
|
||||
// The title has not been found
|
||||
callbackFunction(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
return iterator.advance().then(check);
|
||||
}).then(callbackFunction, errorHandler);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -461,32 +371,30 @@ define(function(require) {
|
||||
* @param callbackFunction
|
||||
*/
|
||||
LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) {
|
||||
var titleFileSize = this.titleFile.size;
|
||||
if (prefix) {
|
||||
prefix = this.normalizeStringIfCompatibleArchive(prefix);
|
||||
}
|
||||
var that = this;
|
||||
var titles = [];
|
||||
var normalize = this.getNormalizeFunction();
|
||||
prefix = normalize(prefix);
|
||||
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
reader.onabort = function(e) {
|
||||
alert('Title file read cancelled');
|
||||
};
|
||||
var currentLocalArchiveInstance = this;
|
||||
var normalizedPrefix = this.normalizeStringIfCompatibleArchive(prefix);
|
||||
this.recursivePrefixSearch(reader, normalizedPrefix, 0, titleFileSize, function(titleOffset) {
|
||||
currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, maxSize, function(titleList) {
|
||||
// Keep only the titles with names starting with the prefix
|
||||
var i = 0;
|
||||
for (i = 0; i < titleList.length; i++) {
|
||||
var titleName = titleList[i].name;
|
||||
var normalizedTitleName = currentLocalArchiveInstance.normalizeStringIfCompatibleArchive(titleName);
|
||||
if (normalizedTitleName.length < normalizedPrefix.length || normalizedTitleName.substring(0, normalizedPrefix.length) !== normalizedPrefix) {
|
||||
break;
|
||||
}
|
||||
titleIterators.FindPrefixOffset(this.titleFile, prefix, normalize).then(function(offset) {
|
||||
var iterator = new titleIterators.SequentialTitleIterator(that, offset);
|
||||
function addNext() {
|
||||
if (titles.length >= maxSize) {
|
||||
return titles;
|
||||
}
|
||||
callbackFunction(titleList.slice(0, i));
|
||||
});
|
||||
});
|
||||
return iterator.advance().then(function(title) {
|
||||
if (title == null)
|
||||
return titles;
|
||||
// check whether this title really starts with the prefix
|
||||
var name = normalize(title.name);
|
||||
if (name.length < prefix.length || name.substring(0, prefix.length) != prefix)
|
||||
return titles;
|
||||
titles.push(title);
|
||||
return addNext();
|
||||
});
|
||||
}
|
||||
return addNext();
|
||||
}).then(callbackFunction, errorHandler);
|
||||
};
|
||||
|
||||
|
||||
@ -950,6 +858,18 @@ define(function(require) {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns a function that normalizes strings if the current archive is compatible.
|
||||
* If it is not, returns the identity function.
|
||||
*/
|
||||
LocalArchive.prototype.getNormalizeFunction = function() {
|
||||
if (this.normalizedTitles === true) {
|
||||
return normalize_string.normalizeString;
|
||||
} else {
|
||||
return function(string) { return string; }
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* ErrorHandler for FileReader
|
||||
* @param {type} evt
|
||||
|
@ -147,4 +147,4 @@ define(function(require) {
|
||||
return {
|
||||
Title: Title
|
||||
};
|
||||
});
|
||||
});
|
||||
|
121
www/js/lib/titleIterators.js
Normal file
121
www/js/lib/titleIterators.js
Normal file
@ -0,0 +1,121 @@
|
||||
/**
|
||||
* titleIterators.js : Various classes to iterate over titles, for example as a
|
||||
* result of searching.
|
||||
*
|
||||
* Copyright 2014 Evopedia developers
|
||||
* License GPL v3:
|
||||
*
|
||||
* This file is part of Evopedia.
|
||||
*
|
||||
* Evopedia is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Evopedia is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) {
|
||||
// Maximum length of a title
|
||||
// 300 bytes is arbitrary : we actually do not really know how long the titles will be
|
||||
// But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough
|
||||
var MAX_TITLE_LENGTH = 300;
|
||||
|
||||
/**
|
||||
* Iterates over all titles starting at the given offset.
|
||||
* The asynchronous method advance has to be called before this.title is
|
||||
* valid.
|
||||
*/
|
||||
function SequentialTitleIterator(archive, offset) {
|
||||
this._titleFile = archive.titleFile;
|
||||
this._archive = archive;
|
||||
this._offset = offset;
|
||||
this.title = null;
|
||||
}
|
||||
/**
|
||||
* Advances to the next title (or the first), if possible.
|
||||
* @returns jQuery promise containing the next title or null if there is no
|
||||
* next title
|
||||
*/
|
||||
SequentialTitleIterator.prototype.advance = function() {
|
||||
if (this._offset >= this._titleFile.size) {
|
||||
this.title = null;
|
||||
return jQuery.when(this.title);
|
||||
}
|
||||
var that = this;
|
||||
return util.readFileSlice(this._titleFile, this._offset,
|
||||
this._offset + MAX_TITLE_LENGTH).then(function(byteArray) {
|
||||
var newLineIndex = 15;
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] != 10) {
|
||||
newLineIndex++;
|
||||
}
|
||||
var encodedTitle = byteArray.subarray(0, newLineIndex);
|
||||
that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset);
|
||||
that._offset += newLineIndex + 1;
|
||||
return that._title;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for the offset into the given title file where the first title
|
||||
* with the given prefix (or lexicographically larger) is located.
|
||||
* The given function normalize is applied to every title before comparison.
|
||||
* @returns jQuery promise giving the offset
|
||||
*/
|
||||
function FindPrefixOffset(titleFile, prefix, normalize) {
|
||||
prefix = normalize(prefix);
|
||||
var lo = 0;
|
||||
var hi = titleFile.size;
|
||||
var iterate = function() {
|
||||
if (lo >= hi) {
|
||||
if (lo > 0)
|
||||
lo += 2; // Let lo point to the start of an entry
|
||||
return jQuery.when(lo);
|
||||
} else {
|
||||
var mid = Math.floor((lo + hi) / 2);
|
||||
return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) {
|
||||
// Look for the index of the next NewLine
|
||||
var newLineIndex = 0;
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||
newLineIndex++;
|
||||
}
|
||||
var startIndex = 0;
|
||||
if (mid > 0) {
|
||||
startIndex = newLineIndex + 16;
|
||||
newLineIndex = startIndex;
|
||||
// Look for the index of the next NewLine
|
||||
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
|
||||
newLineIndex++;
|
||||
}
|
||||
}
|
||||
if (newLineIndex === startIndex) {
|
||||
// End of file reached
|
||||
hi = mid;
|
||||
} else {
|
||||
var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex)));
|
||||
if (normalizedTitle < prefix) {
|
||||
lo = mid + newLineIndex - 1;
|
||||
} else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
return iterate();
|
||||
});
|
||||
}
|
||||
}
|
||||
return iterate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Functions and classes exposed by this module
|
||||
*/
|
||||
return {
|
||||
SequentialTitleIterator : SequentialTitleIterator,
|
||||
FindPrefixOffset : FindPrefixOffset
|
||||
};
|
||||
});
|
@ -20,6 +20,7 @@
|
||||
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
|
||||
*/
|
||||
define(function(require) {
|
||||
var jQuery = require('jquery');
|
||||
|
||||
/**
|
||||
* Utility function : return true if the given string ends with the suffix
|
||||
@ -112,6 +113,25 @@ define(function(require) {
|
||||
|
||||
return (r > 0 ? enc.slice(0, r - 3) : enc) + '==='.slice(r || 3);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a Uint8Array from the given file starting at byte offset begin and
|
||||
* not including byte offset end.
|
||||
* @returns jQuery promise
|
||||
*/
|
||||
function readFileSlice(file, begin, end) {
|
||||
var deferred = jQuery.Deferred();
|
||||
var reader = new FileReader();
|
||||
reader.onload = function(e) {
|
||||
deferred.resolve(new Uint8Array(e.target.result));
|
||||
}
|
||||
reader.onerror = reader.onabort = function(e) {
|
||||
deferred.reject(e);
|
||||
}
|
||||
reader.readAsArrayBuffer(file.slice(begin, end));
|
||||
return deferred.promise();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Functions and classes exposed by this module
|
||||
@ -122,6 +142,7 @@ define(function(require) {
|
||||
readIntegerFrom2Bytes : readIntegerFrom2Bytes,
|
||||
readFloatFrom4Bytes : readFloatFrom4Bytes,
|
||||
uint8ArrayToHex : uint8ArrayToHex,
|
||||
uint8ArrayToBase64 : uint8ArrayToBase64
|
||||
uint8ArrayToBase64 : uint8ArrayToBase64,
|
||||
readFileSlice : readFileSlice
|
||||
};
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user