Adopt search efficiency gains and refactor from Kiwix JS

Former-commit-id: b72adf6609d0166a771e86e5d01a5723ecd0187e [formerly c6729c6f6140e1c5519d31c21eadb248df96f8c6] [formerly 2f0ca94f9fdb8de39b0ee7b1946d8359d193a028] [formerly f1ee7f5c74cc48c706e083c04643b5e94ab8f590 [formerly 468aad126122372266a7d533bf1243ef02251edd [formerly 1129bbc2afaf7b997cb55b9f38de81f03f03d553]]]
Former-commit-id: 52b4132fe79d74f9e5059075a8c63f386896adec [formerly 414381fcd1a3bb9cfffb5bbd579176cdf456c846 [formerly b01f2b45e19f1e52bb8f37edafd9d28dc8329c60]]
Former-commit-id: 2dfb4b4895dffc667650fdfc3660dbb9acb870dd [formerly b05cc40ee28febbbd18e7790b18a4d5583eae89c]
Former-commit-id: 6cced6624083139724f9c75b461a8da014c73f9c
This commit is contained in:
Jaifroid 2021-07-12 21:29:13 +01:00
parent 0a7c56cfac
commit a8afc79971
2 changed files with 17 additions and 19 deletions

View File

@ -2810,7 +2810,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// Cancel the old search (zimArchive search object will receive this change)
appstate.search.status = 'cancelled';
// Initiate a new search object and point appstate.search to it (the zimAcrhive search object will continue to point to the old object)
appstate.search = {'prefix': prefix, 'status': 'init', 'type': ''};
appstate.search = {'prefix': prefix, 'status': 'init', 'type': '', 'size': params.maxResults};
$('#activeContent').hide();
if (!prefix || /^\s/.test(prefix)) {
var sel = prefix ? prefix.replace(/^\s(.*)/, '$1') : '';
@ -2821,7 +2821,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
showZIMIndex(null, sel);
} else {
appstate.selectedArchive.findDirEntriesWithPrefix(appstate.search, params.maxResults, populateListOfArticles);
appstate.selectedArchive.findDirEntriesWithPrefix(appstate.search, populateListOfArticles);
}
} else {
$('#searchingArticles').hide();
@ -2844,9 +2844,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
} else {
prefix = start > 0 ? '' : prefix;
}
var search = {'prefix': prefix, 'state': ''}; // Dummy search object because expected by callee
var search = {'prefix': prefix, 'state': '', 'size': params.maxResults, 'window': params.maxResults};
if (appstate.selectedArchive !== null && appstate.selectedArchive.isReady()) {
appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(prefix, params.maxResults, search, function (dirEntryArray, nextStart) {
appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(prefix, search, function (dirEntryArray, nextStart) {
var docBody = document.getElementById('largeModal');
var newHtml = "";
for (var i = 0; i < dirEntryArray.length; i++) {

View File

@ -171,11 +171,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* See https://phabricator.wikimedia.org/T108536
*
* @param {Object} search The current appstate.search object
* @param {Integer} resultSize The number of dirEntries to find
* @param {callbackDirEntryList} callback The function to call with the result
* @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing)
*/
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, resultSize, callback, noInterim) {
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noInterim) {
var that = this;
// Establish array of initial values that must be searched first. All of these patterns are generated by the full
// search type, and some by basic, but we need the most common patterns to be searched first, as it returns search
@ -219,7 +218,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
function searchNextVariant() {
// If user has initiated a new search, cancel this one
if (search.status === 'cancelled') return callback([], search.status);
if (prefixVariants.length === 0 || dirEntries.length >= resultSize) {
if (prefixVariants.length === 0 || dirEntries.length >= search.size) {
search.status = 'complete';
return callback(dirEntries, search.status);
}
@ -229,10 +228,9 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
search.found = dirEntries.length;
var prefix = prefixVariants[0];
prefixVariants = prefixVariants.slice(1);
search.resultSize = resultSize - dirEntries.length;
// Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search
var searchWindow = search.rgxPrefix ? 10000 * resultSize : resultSize;
that.findDirEntriesWithPrefixCaseSensitive(prefix, searchWindow, search,
search.window = search.rgxPrefix ? 10000 * search.size : search.size;
that.findDirEntriesWithPrefixCaseSensitive(prefix, search,
function (newDirEntries, idx, interim) {
if (search.status === 'cancelled') return callback([], search.status);
if (interim) {// Only push interim results (else results will be pushed again at end of variant loop)
@ -274,12 +272,11 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* Look for dirEntries with title starting with the given prefix (case-sensitive)
*
* @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared
* @param {Integer} searchWindow The maximum number of dirEntries to scan in a single variant pass
* @param {Object} search The appstate.search object (for comparison, so that we can cancel long binary searches)
* @param {Function} callback The function to call with the array of dirEntries with titles that begin with prefix
* @param {Integer} startIndex The index number with which to commence the search, or null
*/
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, searchWindow, search, callback, startIndex) {
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, search, callback, startIndex) {
// Save the value of startIndex because value of null has a special meaning in combination with prefix:
// produces a list of matches starting with first match and then next x dirEntries thereafter
var saveStartIndex = startIndex;
@ -301,14 +298,15 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
});
}, true).then(function(firstIndex) {
var dirEntries = [];
var vDirEntries = [];
var addDirEntries = function(index, lastTitle) {
if (search.status === 'cancelled' || index >= firstIndex + searchWindow || index >= articleCount
|| lastTitle && !~lastTitle.indexOf(prefix) || search.found >= search.resultSize) {
var cnt = index - firstIndex;
if (cnt) console.debug('Scanned ' + cnt + ' titles for "' + prefix + '"');
if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount
|| lastTitle && !~lastTitle.indexOf(prefix) || index - firstIndex >= search.window) {
// DEV: Diagnostics to be removed before merge
if (vDirEntries.length) console.debug('Scanned ' + (index - firstIndex) + ' titles for "' + prefix +
'" (found ' + vDirEntries.length + ' match' + (vDirEntries.length === 1 ? ')' : 'es)'));
return {
'dirEntries': dirEntries,
'dirEntries': vDirEntries,
'nextStart': index
};
}
@ -317,7 +315,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
// Only return dirEntries with titles that actually begin with prefix
if (saveStartIndex === null || dirEntry.namespace === cns && title.indexOf(prefix) === 0) {
if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title)) {
dirEntries.push(dirEntry);
vDirEntries.push(dirEntry);
// Report interim result
if (typeof saveStartIndex === 'undefined') callback([dirEntry], index, true);
}