mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-24 04:54:51 -04:00
Optimize title search by removing redundant cycles (#733)
Fixes #696 (now gets up to 100 search results) and optimizes the code for #637.
This commit is contained in:
parent
c82b4398da
commit
5270bcf155
@ -177,7 +177,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
|
||||
assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"');
|
||||
done();
|
||||
};
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'A'}, 5, callbackFunction, true);
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'A', size: 5}, callbackFunction, true);
|
||||
});
|
||||
QUnit.test("check findDirEntriesWithPrefix 'a'", function(assert) {
|
||||
var done = assert.async();
|
||||
@ -188,7 +188,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
|
||||
assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"');
|
||||
done();
|
||||
};
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'a'}, 5, callbackFunction, true);
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'a', size: 5}, callbackFunction, true);
|
||||
});
|
||||
QUnit.test("check findDirEntriesWithPrefix 'blues brothers'", function(assert) {
|
||||
var done = assert.async();
|
||||
@ -199,7 +199,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
|
||||
assert.equal(firstDirEntry.getTitleOrUrl() , 'Blues Brothers (film)', 'First result should be "Blues Brothers (film)"');
|
||||
done();
|
||||
};
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'blues brothers'}, 5, callbackFunction, true);
|
||||
localZimArchive.findDirEntriesWithPrefix({prefix: 'blues brothers', size: 5}, callbackFunction, true);
|
||||
});
|
||||
QUnit.test("article '(The Night Time Is) The Right Time' correctly redirects to 'Night Time Is the Right Time'", function(assert) {
|
||||
var done = assert.async();
|
||||
|
@ -103,6 +103,7 @@
|
||||
<ul>
|
||||
<li><a href="#aboutApp">About this App</a></li>
|
||||
<li><a href="#usage">Usage</a></li>
|
||||
<li><a href="#searchSyntax">Title Search Usage</a></li>
|
||||
<li><a href="#privacy">Privacy Policy</a></li>
|
||||
<li><a href="#expert">Expert Settings and Technical Information</a>
|
||||
<ul>
|
||||
@ -189,6 +190,25 @@
|
||||
<h4>Step 4: Enjoy your offline content!</h4>
|
||||
<p style="text-align: right"><a href="#contents">↑ Back to Contents</a></p>
|
||||
|
||||
<h3 id="searchSyntax">Title search usage</h3>
|
||||
<p>
|
||||
Title search matches the <b>start</b> of an article title, so if you search for 'France' you will get 'France (country)', 'France (disambiguation)',
|
||||
'France (film)', etc. This kind of search tries to be <b>case-insensitive</b>, but the number of case variants tried is necessarily limited: e.g. if you
|
||||
search for 'unesco world heritiage', it will try 'Unesco world heritage', 'UNESCO world heritage', 'unesco World heritage', 'Unesco WORLD heritage'
|
||||
(etc.), but it won't try 'uNesCO' and other non-obvious case combinations. If you want to search for 'uNesCO', then you must type it exactly like that.
|
||||
</p>
|
||||
<p>
|
||||
If not enough results are returned, you can increase the maximum number of search results using the slider in Configuration. Please note that we are
|
||||
currently unable to support full text search of the contents of articles and titles, though we have plans to do so in the future if it becomes technically
|
||||
possible.
|
||||
</p>
|
||||
<p>
|
||||
<b>Alphabetical search:</b> If you type a letter of the alphabet in the title search box (upper case may be most useful), it will show an alphabetical
|
||||
list of articles starting with that letter (up to the maximum number of search results you selected in Configuration). This is useful for ZIM archives
|
||||
that have descriptive rather than semantic titles (e.g. TED Talks), expecially in <a href="#modes">JQuery mode</a>.
|
||||
</p>
|
||||
<p style="text-align: right"><a href="#contents">↑ Back to Contents</a></p>
|
||||
|
||||
<h3 id="privacy">Privacy policy</h3>
|
||||
|
||||
<h4>Short version:</h4>
|
||||
@ -489,7 +509,7 @@
|
||||
<div class="col-sm-8">
|
||||
<div class="range">
|
||||
<label>
|
||||
<input type="range" class="form-control-range" min="5" max="50" id="titleSearchRange" value="25">
|
||||
<input type="range" class="form-control-range" min="5" max="100" id="titleSearchRange" value="25">
|
||||
<span>Value: <span id="titleSearchRangeVal"></span> (<i>default 25, higher values increase search time</i>)</span>
|
||||
</label>
|
||||
</div>
|
||||
|
@ -1032,9 +1032,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
|
||||
// DEV: Technical explanation: the appstate.search is a pointer to an underlying object assigned in memory, and we are here defining a new object
|
||||
// in memory {'prefix': prefix, 'status': 'init', .....}, and pointing appstate.search to it; the old search object that was passed to selectedArchive
|
||||
// (zimArchive.js) continues to exist in the scope of the functions initiated by the previous search until all Promises have returned
|
||||
appstate.search = {'prefix': prefix, 'status': 'init', 'type': ''};
|
||||
appstate.search = {'prefix': prefix, 'status': 'init', 'type': '', 'size': params.maxSearchResultsSize};
|
||||
$('#activeContent').hide();
|
||||
selectedArchive.findDirEntriesWithPrefix(appstate.search, params.maxSearchResultsSize, populateListOfArticles);
|
||||
selectedArchive.findDirEntriesWithPrefix(appstate.search, populateListOfArticles);
|
||||
} else {
|
||||
$('#searchingArticles').hide();
|
||||
// We have to remove the focus from the search field,
|
||||
|
@ -171,11 +171,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
* See https://phabricator.wikimedia.org/T108536
|
||||
*
|
||||
* @param {Object} search The current appstate.search object
|
||||
* @param {Integer} resultSize The number of dirEntries to find
|
||||
* @param {callbackDirEntryList} callback The function to call with the result
|
||||
* @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing)
|
||||
*/
|
||||
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, resultSize, callback, noInterim) {
|
||||
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noInterim) {
|
||||
var that = this;
|
||||
// Establish array of initial values that must be searched first. All of these patterns are generated by the full
|
||||
// search type, and some by basic, but we need the most common patterns to be searched first, as it returns search
|
||||
@ -193,7 +192,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
// Get the full array of combinations to check number of combinations
|
||||
var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
|
||||
// Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
|
||||
search.type = fullCombos.length < 200 ? 'full' : 'basic';
|
||||
search.type = fullCombos.length < 300 ? 'full' : 'basic';
|
||||
// We have to remove duplicate string combinations because util.allCaseFirstLetters() can return some combinations
|
||||
// where uppercase and lowercase combinations are exactly the same, e.g. where prefix begins with punctuation
|
||||
// or currency signs, for languages without case, or where user-entered case duplicates calculated case
|
||||
@ -210,20 +209,22 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
function searchNextVariant() {
|
||||
// If user has initiated a new search, cancel this one
|
||||
if (search.status === 'cancelled') return callback([], search);
|
||||
if (prefixVariants.length === 0 || dirEntries.length >= resultSize) {
|
||||
if (prefixVariants.length === 0 || dirEntries.length >= search.size) {
|
||||
search.status = 'complete';
|
||||
return callback(dirEntries, search);
|
||||
}
|
||||
// Dynamically populate list of articles
|
||||
search.status = 'interim';
|
||||
if (!noInterim) callback(dirEntries, search);
|
||||
search.found = dirEntries.length;
|
||||
var prefix = prefixVariants[0];
|
||||
prefixVariants = prefixVariants.slice(1);
|
||||
that.findDirEntriesWithPrefixCaseSensitive(prefix, resultSize - dirEntries.length, search,
|
||||
that.findDirEntriesWithPrefixCaseSensitive(prefix, search,
|
||||
function (newDirEntries, interim) {
|
||||
if (search.status === 'cancelled') return callback([], search);
|
||||
if (interim) {// Only push interim results (else results will be pushed again at end of variant loop)
|
||||
[].push.apply(dirEntries, newDirEntries);
|
||||
search.found = dirEntries.length;
|
||||
if (!noInterim && newDirEntries.length) callback(dirEntries, search);
|
||||
} else searchNextVariant();
|
||||
}
|
||||
@ -260,11 +261,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
* Look for dirEntries with title starting with the given prefix (case-sensitive)
|
||||
*
|
||||
* @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared
|
||||
* @param {Integer} resultSize The maximum number of results to return
|
||||
* @param {Object} search The appstate.search object (for comparison, so that we can cancel long binary searches)
|
||||
* @param {callbackDirEntryList} callback The function to call with the array of dirEntries with titles that begin with prefix
|
||||
*/
|
||||
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, search, callback) {
|
||||
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, search, callback) {
|
||||
var that = this;
|
||||
var cns = this.getContentNamespace();
|
||||
// Search v1 article listing if available, otherwise fallback to v0
|
||||
@ -281,20 +281,24 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
|
||||
});
|
||||
}, true).then(function(firstIndex) {
|
||||
var dirEntries = [];
|
||||
var addDirEntries = function(index) {
|
||||
if (search.status === 'cancelled' || index >= firstIndex + resultSize || index >= articleCount) {
|
||||
return dirEntries;
|
||||
var vDirEntries = [];
|
||||
var addDirEntries = function(index, lastTitle) {
|
||||
if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount
|
||||
|| lastTitle && !~lastTitle.indexOf(prefix)) {
|
||||
// DEV: Diagnostics to be removed before merge
|
||||
if (vDirEntries.length) console.debug('Scanned ' + (index - firstIndex) + ' titles for "' + prefix +
|
||||
'" (found ' + vDirEntries.length + ' match' + (vDirEntries.length === 1 ? ')' : 'es)'));
|
||||
return vDirEntries;
|
||||
}
|
||||
return that._file.dirEntryByTitleIndex(index).then(function(dirEntry) {
|
||||
var title = dirEntry.getTitleOrUrl();
|
||||
// Only return dirEntries with titles that actually begin with prefix
|
||||
if (dirEntry.namespace === cns && title.indexOf(prefix) === 0) {
|
||||
dirEntries.push(dirEntry);
|
||||
vDirEntries.push(dirEntry);
|
||||
// Report interim result
|
||||
callback([dirEntry], true);
|
||||
}
|
||||
return addDirEntries(index + 1);
|
||||
return addDirEntries(index + 1, title);
|
||||
});
|
||||
};
|
||||
return addDirEntries(firstIndex);
|
||||
|
Loading…
x
Reference in New Issue
Block a user