From a8afc79971b6a1300a9e96c0024cec803a9f716f Mon Sep 17 00:00:00 2001 From: Jaifroid Date: Mon, 12 Jul 2021 21:29:13 +0100 Subject: [PATCH] Adopt search efficiency gains and refactor from Kiwix JS Former-commit-id: b72adf6609d0166a771e86e5d01a5723ecd0187e [formerly c6729c6f6140e1c5519d31c21eadb248df96f8c6] [formerly 2f0ca94f9fdb8de39b0ee7b1946d8359d193a028] [formerly f1ee7f5c74cc48c706e083c04643b5e94ab8f590 [formerly 468aad126122372266a7d533bf1243ef02251edd [formerly 1129bbc2afaf7b997cb55b9f38de81f03f03d553]]] Former-commit-id: 52b4132fe79d74f9e5059075a8c63f386896adec [formerly 414381fcd1a3bb9cfffb5bbd579176cdf456c846 [formerly b01f2b45e19f1e52bb8f37edafd9d28dc8329c60]] Former-commit-id: 2dfb4b4895dffc667650fdfc3660dbb9acb870dd [formerly b05cc40ee28febbbd18e7790b18a4d5583eae89c] Former-commit-id: 6cced6624083139724f9c75b461a8da014c73f9c --- www/js/app.js | 8 ++++---- www/js/lib/zimArchive.js | 28 +++++++++++++--------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/www/js/app.js b/www/js/app.js index 178fd136..b39adbc1 100644 --- a/www/js/app.js +++ b/www/js/app.js @@ -2810,7 +2810,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett // Cancel the old search (zimArchive search object will receive this change) appstate.search.status = 'cancelled'; // Initiate a new search object and point appstate.search to it (the zimAcrhive search object will continue to point to the old object) - appstate.search = {'prefix': prefix, 'status': 'init', 'type': ''}; + appstate.search = {'prefix': prefix, 'status': 'init', 'type': '', 'size': params.maxResults}; $('#activeContent').hide(); if (!prefix || /^\s/.test(prefix)) { var sel = prefix ? prefix.replace(/^\s(.*)/, '$1') : ''; @@ -2821,7 +2821,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett } showZIMIndex(null, sel); } else { - appstate.selectedArchive.findDirEntriesWithPrefix(appstate.search, params.maxResults, populateListOfArticles); + appstate.selectedArchive.findDirEntriesWithPrefix(appstate.search, populateListOfArticles); } } else { $('#searchingArticles').hide(); @@ -2844,9 +2844,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett } else { prefix = start > 0 ? '' : prefix; } - var search = {'prefix': prefix, 'state': ''}; // Dummy search object because expected by callee + var search = {'prefix': prefix, 'state': '', 'size': params.maxResults, 'window': params.maxResults}; if (appstate.selectedArchive !== null && appstate.selectedArchive.isReady()) { - appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(prefix, params.maxResults, search, function (dirEntryArray, nextStart) { + appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(prefix, search, function (dirEntryArray, nextStart) { var docBody = document.getElementById('largeModal'); var newHtml = ""; for (var i = 0; i < dirEntryArray.length; i++) { diff --git a/www/js/lib/zimArchive.js b/www/js/lib/zimArchive.js index a0f64aae..22704095 100644 --- a/www/js/lib/zimArchive.js +++ b/www/js/lib/zimArchive.js @@ -171,11 +171,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], * See https://phabricator.wikimedia.org/T108536 * * @param {Object} search The current appstate.search object - * @param {Integer} resultSize The number of dirEntries to find * @param {callbackDirEntryList} callback The function to call with the result * @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing) */ - ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, resultSize, callback, noInterim) { + ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noInterim) { var that = this; // Establish array of initial values that must be searched first. All of these patterns are generated by the full // search type, and some by basic, but we need the most common patterns to be searched first, as it returns search @@ -219,7 +218,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], function searchNextVariant() { // If user has initiated a new search, cancel this one if (search.status === 'cancelled') return callback([], search.status); - if (prefixVariants.length === 0 || dirEntries.length >= resultSize) { + if (prefixVariants.length === 0 || dirEntries.length >= search.size) { search.status = 'complete'; return callback(dirEntries, search.status); } @@ -229,10 +228,9 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], search.found = dirEntries.length; var prefix = prefixVariants[0]; prefixVariants = prefixVariants.slice(1); - search.resultSize = resultSize - dirEntries.length; // Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search - var searchWindow = search.rgxPrefix ? 10000 * resultSize : resultSize; - that.findDirEntriesWithPrefixCaseSensitive(prefix, searchWindow, search, + search.window = search.rgxPrefix ? 10000 * search.size : search.size; + that.findDirEntriesWithPrefixCaseSensitive(prefix, search, function (newDirEntries, idx, interim) { if (search.status === 'cancelled') return callback([], search.status); if (interim) {// Only push interim results (else results will be pushed again at end of variant loop) @@ -274,12 +272,11 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], * Look for dirEntries with title starting with the given prefix (case-sensitive) * * @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared - * @param {Integer} searchWindow The maximum number of dirEntries to scan in a single variant pass * @param {Object} search The appstate.search object (for comparison, so that we can cancel long binary searches) * @param {Function} callback The function to call with the array of dirEntries with titles that begin with prefix * @param {Integer} startIndex The index number with which to commence the search, or null */ - ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, searchWindow, search, callback, startIndex) { + ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, search, callback, startIndex) { // Save the value of startIndex because value of null has a special meaning in combination with prefix: // produces a list of matches starting with first match and then next x dirEntries thereafter var saveStartIndex = startIndex; @@ -301,14 +298,15 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1; }); }, true).then(function(firstIndex) { - var dirEntries = []; + var vDirEntries = []; var addDirEntries = function(index, lastTitle) { - if (search.status === 'cancelled' || index >= firstIndex + searchWindow || index >= articleCount - || lastTitle && !~lastTitle.indexOf(prefix) || search.found >= search.resultSize) { - var cnt = index - firstIndex; - if (cnt) console.debug('Scanned ' + cnt + ' titles for "' + prefix + '"'); + if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount + || lastTitle && !~lastTitle.indexOf(prefix) || index - firstIndex >= search.window) { + // DEV: Diagnostics to be removed before merge + if (vDirEntries.length) console.debug('Scanned ' + (index - firstIndex) + ' titles for "' + prefix + + '" (found ' + vDirEntries.length + ' match' + (vDirEntries.length === 1 ? ')' : 'es)')); return { - 'dirEntries': dirEntries, + 'dirEntries': vDirEntries, 'nextStart': index }; } @@ -317,7 +315,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], // Only return dirEntries with titles that actually begin with prefix if (saveStartIndex === null || dirEntry.namespace === cns && title.indexOf(prefix) === 0) { if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title)) { - dirEntries.push(dirEntry); + vDirEntries.push(dirEntry); // Report interim result if (typeof saveStartIndex === 'undefined') callback([dirEntry], index, true); }