mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-08 11:48:26 -04:00
Include fuzzy search for Zimit articles and title search (#379)
This commit is contained in:
parent
886c7183bb
commit
d62cea58c8
@ -1,5 +1,9 @@
|
||||
# Changelog
|
||||
|
||||
## In-progress release 2.4.1
|
||||
|
||||
* ENHANCEMENT: Provide fuzzy search for case-insensitive links in Zimit archives
|
||||
|
||||
## Release 2.4.0
|
||||
|
||||
* FEATURE: Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)
|
||||
|
@ -106,6 +106,7 @@
|
||||
<div id="update" class="update">
|
||||
<h3 style="margin-top:0;">Changes in version <span class="version">2.0</span></h3>
|
||||
<ul style="padding-left: 15px;">
|
||||
<li>Provide fuzzy search for case-insensitive links in Zimit archives</li>
|
||||
<li>Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)</li>
|
||||
<li>Significant speed-up of access to Wikimedia archives with option to ignore unneeded JS files</li>
|
||||
<li>Added sandbox attribute to iframe to block top-level navigation and attempts by scripts to "phone home"</li>
|
||||
|
@ -65,7 +65,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
||||
// Test caching capability
|
||||
cache.test(function(){});
|
||||
// Unique identifier of the article expected to be displayed
|
||||
var expectedArticleURLToBeDisplayed = "";
|
||||
appstate.expectedArticleURLToBeDisplayed = '';
|
||||
// Check if we have managed to switch to PWA mode (if running UWP app)
|
||||
// DEV: we do this in init.js, but sometimes it doesn't seem to register, so we do it again once the app has fully launched
|
||||
if (/UWP\|PWA/.test(params.appType) && /^http/i.test(window.location.protocol)) {
|
||||
@ -3992,9 +3992,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
||||
*/
|
||||
function isDirEntryExpectedToBeDisplayed(dirEntry) {
|
||||
var curArticleURL = dirEntry.namespace + "/" + dirEntry.url;
|
||||
if (expectedArticleURLToBeDisplayed !== curArticleURL) {
|
||||
if (appstate.expectedArticleURLToBeDisplayed !== curArticleURL) {
|
||||
console.debug("url of current article :" + curArticleURL + ", does not match the expected url :" +
|
||||
expectedArticleURLToBeDisplayed);
|
||||
appstate.expectedArticleURLToBeDisplayed);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -4008,7 +4008,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
||||
// Reset search prefix to allow users to search the same string again if they want to
|
||||
appstate.search.prefix = '';
|
||||
// Only update for expectedArticleURLToBeDisplayed.
|
||||
expectedArticleURLToBeDisplayed = dirEntry.namespace + "/" + dirEntry.url;
|
||||
appstate.expectedArticleURLToBeDisplayed = dirEntry.namespace + '/' + dirEntry.url;
|
||||
params.pagesLoaded++;
|
||||
if (dirEntry.isRedirect()) {
|
||||
appstate.selectedArchive.resolveRedirect(dirEntry, readArticle);
|
||||
@ -5704,6 +5704,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
||||
* @param {String} pathEnc The fully encoded version of the path for use with some Zimit archives
|
||||
*/
|
||||
function goToArticle(path, download, contentType, pathEnc) {
|
||||
appstate.expectedArticleURLToBeDisplayed = path;
|
||||
//This removes any search highlighting
|
||||
clearFindInArticle();
|
||||
var shortTitle = path.replace(/[^/]+\//g, '').substring(0, 18);
|
||||
|
@ -172,7 +172,7 @@ define(rqDef, function(util) {
|
||||
}
|
||||
}
|
||||
|
||||
function pollSpinner(msg) {
|
||||
function pollSpinner(msg, noTimeout) {
|
||||
msg = msg || '';
|
||||
document.getElementById('searchingArticles').style.display = 'block';
|
||||
var cachingAssets = document.getElementById('cachingAssets');
|
||||
@ -181,7 +181,7 @@ define(rqDef, function(util) {
|
||||
else cachingAssets.style.display = 'none';
|
||||
// Never allow spinner to run for more than 3s
|
||||
clearTimeout(clearSpinner);
|
||||
setTimeout(clearSpinner, 3000);
|
||||
if (!noTimeout) setTimeout(clearSpinner, 3000);
|
||||
}
|
||||
|
||||
function clearSpinner() {
|
||||
|
@ -290,7 +290,7 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//;
|
||||
if (that._file.zimType === 'zimit' && cns === 'C') {
|
||||
// We have to account for the Zimit prefix in Type 1 ZIMs
|
||||
rgxSplitPrefix = /^[CMWX]\/(?:[AH]\/)?/;
|
||||
rgxSplitPrefix = /^(?:[CMWX]\/)?(?:[AH]\/)?/;
|
||||
}
|
||||
var splitPrefix = prefix.match(rgxSplitPrefix);
|
||||
prefixNameSpaces = splitPrefix ? splitPrefix[0] : '';
|
||||
@ -305,6 +305,10 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
startArray.push(prefix.replace(/^./, function (m) {
|
||||
return m.toLocaleUpperCase();
|
||||
}));
|
||||
// Add pure lowercase string (rarer)
|
||||
startArray.push(prefix);
|
||||
// Add a case-insensitive search for the string (pseudo-regex notation)
|
||||
startArray.push('/' + prefix + '/i');
|
||||
// Get the full array of combinations to check number of combinations
|
||||
var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
|
||||
// Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
|
||||
@ -343,7 +347,14 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
if (!noInterim) callback(dirEntries, search);
|
||||
search.found = dirEntries.length;
|
||||
var prefix = prefixNameSpaces + prefixVariants[0];
|
||||
// console.debug('Searching for: ' + prefixVariants[0]);
|
||||
search.lc = false;
|
||||
// If it's pseudo-regex with a case-insensitive flag like '/my search/i', do an enhanced case-insensitive search
|
||||
if (/^\/.+\/i$/.test(prefixVariants[0])) {
|
||||
search.lc = true;
|
||||
prefix = prefixNameSpaces + prefixVariants[0].replace(/^\/(.+)\/i/, '$1').toLocaleLowerCase();
|
||||
console.debug('Searching case-insensitively for: "' + prefix + '"');
|
||||
}
|
||||
// Remove in-progress search variant from array
|
||||
prefixVariants = prefixVariants.slice(1);
|
||||
// Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search
|
||||
search.window = search.rgxPrefix ? 10000 * search.size : search.size;
|
||||
@ -352,8 +363,18 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
search.countReport = countReport;
|
||||
if (search.status === 'cancelled') return callback([], search);
|
||||
if (!noInterim && countReport === true) return callback(dirEntries, search);
|
||||
if (interim) {// Only push interim results (else results will be pushed again at end of variant loop)
|
||||
[].push.apply(dirEntries, newDirEntries);
|
||||
// Only push interim results to the dirEntries array (otherwise we get a duplicated array when the final results are reported to this function)
|
||||
if (interim) {
|
||||
// Collect all the found paths for the dirEntries so far
|
||||
var dirEntryPaths = [];
|
||||
for (var i = 0; i < dirEntries.length; i++) {
|
||||
dirEntryPaths.push(dirEntries[i].url);
|
||||
}
|
||||
// Push new directory entries to the end of the global array so long as they are not duplicates
|
||||
for (var j = 0; j < newDirEntries.length; j++) {
|
||||
if (~dirEntryPaths.indexOf(newDirEntries[j].url)) continue;
|
||||
dirEntries.push(newDirEntries[j]);
|
||||
}
|
||||
search.found = dirEntries.length;
|
||||
if (!noInterim && newDirEntries.length) return callback(dirEntries, search);
|
||||
} else return searchNextVariant();
|
||||
@ -420,9 +441,18 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
if (ns < cns) return 1;
|
||||
if (ns > cns) return -1;
|
||||
// We should now be in namespace A (old format ZIM) or C (new format ZIM)
|
||||
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
|
||||
if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
|
||||
ti = ti.toLocaleLowerCase();
|
||||
prefix = prefix.toLocaleLowerCase();
|
||||
}
|
||||
return prefix <= ti ? -1 : 1;
|
||||
} else {
|
||||
return prefix <= ns + '/' + ti ? -1 : 1;
|
||||
if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
|
||||
ns = ns + '/' + ti.replace(/^((?:[AH])?)\/?.*/, '$1');
|
||||
ti = ti.replace(/^[AH]\//, '').toLocaleLowerCase();
|
||||
}
|
||||
// if (search.rgxPrefix && search.rgxPrefix.test(ti)) return -1;
|
||||
return prefix <= (ns + '/' + ti) ? -1 : 1;
|
||||
}
|
||||
});
|
||||
}, true).then(function(firstIndex) {
|
||||
@ -445,9 +475,13 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
var title = dirEntry.getTitleOrUrl();
|
||||
// If we are searching by URL, display namespace also
|
||||
if (search.searchUrlIndex) title = dirEntry.namespace + '/' + dirEntry.url;
|
||||
if (search.lc && !search.rgxPrefix) { // Search comparator should be lowercase if not using regex (for case-insensitive search)
|
||||
var ns = title.replace(/^((?:C\/)?(?:[AH]\/)?).*/, '$1');
|
||||
title = ns + title.replace(ns, '').toLocaleLowerCase();
|
||||
}
|
||||
// Only return dirEntries with titles that actually begin with prefix
|
||||
if (saveStartIndex === null || (search.searchUrlIndex || dirEntry.namespace === cns) && title.indexOf(prefix) === 0) {
|
||||
if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title.replace(prefix, ''))) {
|
||||
if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title)) { // Regex test case-insensitive if i flag set
|
||||
vDirEntries.push(dirEntry);
|
||||
// Report interim result
|
||||
if (typeof saveStartIndex === 'undefined') callback([dirEntry], false, true);
|
||||
@ -610,6 +644,9 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
}
|
||||
callback(dirEntry, data);
|
||||
}
|
||||
}).catch(function (e) {
|
||||
console.error('Error reading directory entry', e);
|
||||
callback(dirEntry, '');
|
||||
});
|
||||
};
|
||||
|
||||
@ -681,11 +718,30 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/');
|
||||
console.debug('DirEntry ' + oldPath + ' not found, looking up header: ' + path);
|
||||
return that.getDirEntryByPath(path, true, oldPath);
|
||||
// } else if (zimitResolving) {
|
||||
} else if (zimitResolving && appstate.originalPath && appstate.originalPath === appstate.expectedArticleURLToBeDisplayed) {
|
||||
// We couldn't find the Header, so try a fuzzy search only if the user is loading an article
|
||||
path = appstate.originalPath;
|
||||
var ns = path.replace(/^((?:C\/)?A\/).*/, '$1'); // If Zimit pseudo-namespaces are changed, will need to edit this
|
||||
path = path.replace(ns, '');
|
||||
path = path.toLocaleLowerCase(); // We are going to combine case-insensitive string comparison with regex matching
|
||||
var rgxPath = path.replace(/([-/?.$^|*+()[{])/g, '\\$1'); // Make sure we escape regex characters
|
||||
path = ns + path; // Add namespace back to path for full matching
|
||||
// path = ns;
|
||||
var search = {
|
||||
rgxPrefix: new RegExp('.*' + rgxPath, 'i'),
|
||||
searchUrlIndex: true,
|
||||
lc: true, // Make the comparator (e.g. dirEntry.url) lowercase
|
||||
size: 1,
|
||||
found: 0
|
||||
}
|
||||
return fuzzySearch(path, search);
|
||||
} else {
|
||||
var newpath = path.replace(/^((?:A|C\/A)\/)[^/]+\/(.+)$/, '$1$2');
|
||||
if (newpath === path) return null; // No further paths to explore!
|
||||
console.log("Article " + path + " not available, but moving up one directory to compensate for ZIM coding error...");
|
||||
return that.getDirEntryByPath(newpath);
|
||||
}
|
||||
var newpath = path.replace(/^((?:A|C\/A)\/)[^/]+\/(.+)$/, '$1$2');
|
||||
if (newpath === path) return null; // No further paths to explore!
|
||||
console.log("Article " + path + " not available, but moving up one directory to compensate for ZIM coding error...");
|
||||
return that.getDirEntryByPath(newpath);
|
||||
} else {
|
||||
// DEBUG: List found Directory Entry
|
||||
// if (dirEntry) console.debug('Found ' + path);
|
||||
@ -694,6 +750,37 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Initiate a fuzzy search for dirEntries matching the search object
|
||||
* @param {String} path Human-readable path to search for
|
||||
* @param {Object} search The search object
|
||||
* @returns {Promise<DirEntry>} A Promise that resolves to a Directory Entry, or null if not found
|
||||
*/
|
||||
function fuzzySearch(path, search) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
console.log('Initiating fuzzy search for ' + path + '...');
|
||||
uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
|
||||
var searchResolved = false;
|
||||
// setTimeout(function () {
|
||||
// if (!searchResolved) uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
|
||||
// }, 5000);
|
||||
appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(path, search, function (dirEntry) {
|
||||
if (!search.found && dirEntry && dirEntry[0] && dirEntry[0].url) {
|
||||
search.found++;
|
||||
dirEntry = dirEntry[0];
|
||||
dirEntry = transformZimit.filterReplayFiles(dirEntry);
|
||||
if (dirEntry) console.debug('Found ' + dirEntry.url + ' in fuzzy search');
|
||||
searchResolved = true;
|
||||
resolve(dirEntry);
|
||||
} else {
|
||||
console.debug('No fuzzy search results found');
|
||||
searchResolved = true;
|
||||
resolve(null);
|
||||
}
|
||||
}, null);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {callbackDirEntry} callback
|
||||
|
Loading…
x
Reference in New Issue
Block a user