mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-10 12:53:54 -04:00
Include fuzzy search for Zimit articles and title search (#379)
This commit is contained in:
parent
886c7183bb
commit
d62cea58c8
@ -1,5 +1,9 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## In-progress release 2.4.1
|
||||||
|
|
||||||
|
* ENHANCEMENT: Provide fuzzy search for case-insensitive links in Zimit archives
|
||||||
|
|
||||||
## Release 2.4.0
|
## Release 2.4.0
|
||||||
|
|
||||||
* FEATURE: Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)
|
* FEATURE: Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)
|
||||||
|
@ -106,6 +106,7 @@
|
|||||||
<div id="update" class="update">
|
<div id="update" class="update">
|
||||||
<h3 style="margin-top:0;">Changes in version <span class="version">2.0</span></h3>
|
<h3 style="margin-top:0;">Changes in version <span class="version">2.0</span></h3>
|
||||||
<ul style="padding-left: 15px;">
|
<ul style="padding-left: 15px;">
|
||||||
|
<li>Provide fuzzy search for case-insensitive links in Zimit archives</li>
|
||||||
<li>Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)</li>
|
<li>Support Full Screen (all browsers) and rotation lock (primarily intended for mobile)</li>
|
||||||
<li>Significant speed-up of access to Wikimedia archives with option to ignore unneeded JS files</li>
|
<li>Significant speed-up of access to Wikimedia archives with option to ignore unneeded JS files</li>
|
||||||
<li>Added sandbox attribute to iframe to block top-level navigation and attempts by scripts to "phone home"</li>
|
<li>Added sandbox attribute to iframe to block top-level navigation and attempts by scripts to "phone home"</li>
|
||||||
|
@ -65,7 +65,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
|||||||
// Test caching capability
|
// Test caching capability
|
||||||
cache.test(function(){});
|
cache.test(function(){});
|
||||||
// Unique identifier of the article expected to be displayed
|
// Unique identifier of the article expected to be displayed
|
||||||
var expectedArticleURLToBeDisplayed = "";
|
appstate.expectedArticleURLToBeDisplayed = '';
|
||||||
// Check if we have managed to switch to PWA mode (if running UWP app)
|
// Check if we have managed to switch to PWA mode (if running UWP app)
|
||||||
// DEV: we do this in init.js, but sometimes it doesn't seem to register, so we do it again once the app has fully launched
|
// DEV: we do this in init.js, but sometimes it doesn't seem to register, so we do it again once the app has fully launched
|
||||||
if (/UWP\|PWA/.test(params.appType) && /^http/i.test(window.location.protocol)) {
|
if (/UWP\|PWA/.test(params.appType) && /^http/i.test(window.location.protocol)) {
|
||||||
@ -3992,9 +3992,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
|||||||
*/
|
*/
|
||||||
function isDirEntryExpectedToBeDisplayed(dirEntry) {
|
function isDirEntryExpectedToBeDisplayed(dirEntry) {
|
||||||
var curArticleURL = dirEntry.namespace + "/" + dirEntry.url;
|
var curArticleURL = dirEntry.namespace + "/" + dirEntry.url;
|
||||||
if (expectedArticleURLToBeDisplayed !== curArticleURL) {
|
if (appstate.expectedArticleURLToBeDisplayed !== curArticleURL) {
|
||||||
console.debug("url of current article :" + curArticleURL + ", does not match the expected url :" +
|
console.debug("url of current article :" + curArticleURL + ", does not match the expected url :" +
|
||||||
expectedArticleURLToBeDisplayed);
|
appstate.expectedArticleURLToBeDisplayed);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -4008,7 +4008,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
|||||||
// Reset search prefix to allow users to search the same string again if they want to
|
// Reset search prefix to allow users to search the same string again if they want to
|
||||||
appstate.search.prefix = '';
|
appstate.search.prefix = '';
|
||||||
// Only update for expectedArticleURLToBeDisplayed.
|
// Only update for expectedArticleURLToBeDisplayed.
|
||||||
expectedArticleURLToBeDisplayed = dirEntry.namespace + "/" + dirEntry.url;
|
appstate.expectedArticleURLToBeDisplayed = dirEntry.namespace + '/' + dirEntry.url;
|
||||||
params.pagesLoaded++;
|
params.pagesLoaded++;
|
||||||
if (dirEntry.isRedirect()) {
|
if (dirEntry.isRedirect()) {
|
||||||
appstate.selectedArchive.resolveRedirect(dirEntry, readArticle);
|
appstate.selectedArchive.resolveRedirect(dirEntry, readArticle);
|
||||||
@ -5704,6 +5704,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'utf8', 'cache', 'images
|
|||||||
* @param {String} pathEnc The fully encoded version of the path for use with some Zimit archives
|
* @param {String} pathEnc The fully encoded version of the path for use with some Zimit archives
|
||||||
*/
|
*/
|
||||||
function goToArticle(path, download, contentType, pathEnc) {
|
function goToArticle(path, download, contentType, pathEnc) {
|
||||||
|
appstate.expectedArticleURLToBeDisplayed = path;
|
||||||
//This removes any search highlighting
|
//This removes any search highlighting
|
||||||
clearFindInArticle();
|
clearFindInArticle();
|
||||||
var shortTitle = path.replace(/[^/]+\//g, '').substring(0, 18);
|
var shortTitle = path.replace(/[^/]+\//g, '').substring(0, 18);
|
||||||
|
@ -172,7 +172,7 @@ define(rqDef, function(util) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function pollSpinner(msg) {
|
function pollSpinner(msg, noTimeout) {
|
||||||
msg = msg || '';
|
msg = msg || '';
|
||||||
document.getElementById('searchingArticles').style.display = 'block';
|
document.getElementById('searchingArticles').style.display = 'block';
|
||||||
var cachingAssets = document.getElementById('cachingAssets');
|
var cachingAssets = document.getElementById('cachingAssets');
|
||||||
@ -181,7 +181,7 @@ define(rqDef, function(util) {
|
|||||||
else cachingAssets.style.display = 'none';
|
else cachingAssets.style.display = 'none';
|
||||||
// Never allow spinner to run for more than 3s
|
// Never allow spinner to run for more than 3s
|
||||||
clearTimeout(clearSpinner);
|
clearTimeout(clearSpinner);
|
||||||
setTimeout(clearSpinner, 3000);
|
if (!noTimeout) setTimeout(clearSpinner, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
function clearSpinner() {
|
function clearSpinner() {
|
||||||
|
@ -290,7 +290,7 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//;
|
var rgxSplitPrefix = /^[-ABCHIJMUVWX]\//;
|
||||||
if (that._file.zimType === 'zimit' && cns === 'C') {
|
if (that._file.zimType === 'zimit' && cns === 'C') {
|
||||||
// We have to account for the Zimit prefix in Type 1 ZIMs
|
// We have to account for the Zimit prefix in Type 1 ZIMs
|
||||||
rgxSplitPrefix = /^[CMWX]\/(?:[AH]\/)?/;
|
rgxSplitPrefix = /^(?:[CMWX]\/)?(?:[AH]\/)?/;
|
||||||
}
|
}
|
||||||
var splitPrefix = prefix.match(rgxSplitPrefix);
|
var splitPrefix = prefix.match(rgxSplitPrefix);
|
||||||
prefixNameSpaces = splitPrefix ? splitPrefix[0] : '';
|
prefixNameSpaces = splitPrefix ? splitPrefix[0] : '';
|
||||||
@ -305,6 +305,10 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
startArray.push(prefix.replace(/^./, function (m) {
|
startArray.push(prefix.replace(/^./, function (m) {
|
||||||
return m.toLocaleUpperCase();
|
return m.toLocaleUpperCase();
|
||||||
}));
|
}));
|
||||||
|
// Add pure lowercase string (rarer)
|
||||||
|
startArray.push(prefix);
|
||||||
|
// Add a case-insensitive search for the string (pseudo-regex notation)
|
||||||
|
startArray.push('/' + prefix + '/i');
|
||||||
// Get the full array of combinations to check number of combinations
|
// Get the full array of combinations to check number of combinations
|
||||||
var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
|
var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
|
||||||
// Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
|
// Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
|
||||||
@ -343,7 +347,14 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
if (!noInterim) callback(dirEntries, search);
|
if (!noInterim) callback(dirEntries, search);
|
||||||
search.found = dirEntries.length;
|
search.found = dirEntries.length;
|
||||||
var prefix = prefixNameSpaces + prefixVariants[0];
|
var prefix = prefixNameSpaces + prefixVariants[0];
|
||||||
// console.debug('Searching for: ' + prefixVariants[0]);
|
search.lc = false;
|
||||||
|
// If it's pseudo-regex with a case-insensitive flag like '/my search/i', do an enhanced case-insensitive search
|
||||||
|
if (/^\/.+\/i$/.test(prefixVariants[0])) {
|
||||||
|
search.lc = true;
|
||||||
|
prefix = prefixNameSpaces + prefixVariants[0].replace(/^\/(.+)\/i/, '$1').toLocaleLowerCase();
|
||||||
|
console.debug('Searching case-insensitively for: "' + prefix + '"');
|
||||||
|
}
|
||||||
|
// Remove in-progress search variant from array
|
||||||
prefixVariants = prefixVariants.slice(1);
|
prefixVariants = prefixVariants.slice(1);
|
||||||
// Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search
|
// Search window sets an upper limit on how many matching dirEntries will be scanned in a full index search
|
||||||
search.window = search.rgxPrefix ? 10000 * search.size : search.size;
|
search.window = search.rgxPrefix ? 10000 * search.size : search.size;
|
||||||
@ -352,8 +363,18 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
search.countReport = countReport;
|
search.countReport = countReport;
|
||||||
if (search.status === 'cancelled') return callback([], search);
|
if (search.status === 'cancelled') return callback([], search);
|
||||||
if (!noInterim && countReport === true) return callback(dirEntries, search);
|
if (!noInterim && countReport === true) return callback(dirEntries, search);
|
||||||
if (interim) {// Only push interim results (else results will be pushed again at end of variant loop)
|
// Only push interim results to the dirEntries array (otherwise we get a duplicated array when the final results are reported to this function)
|
||||||
[].push.apply(dirEntries, newDirEntries);
|
if (interim) {
|
||||||
|
// Collect all the found paths for the dirEntries so far
|
||||||
|
var dirEntryPaths = [];
|
||||||
|
for (var i = 0; i < dirEntries.length; i++) {
|
||||||
|
dirEntryPaths.push(dirEntries[i].url);
|
||||||
|
}
|
||||||
|
// Push new directory entries to the end of the global array so long as they are not duplicates
|
||||||
|
for (var j = 0; j < newDirEntries.length; j++) {
|
||||||
|
if (~dirEntryPaths.indexOf(newDirEntries[j].url)) continue;
|
||||||
|
dirEntries.push(newDirEntries[j]);
|
||||||
|
}
|
||||||
search.found = dirEntries.length;
|
search.found = dirEntries.length;
|
||||||
if (!noInterim && newDirEntries.length) return callback(dirEntries, search);
|
if (!noInterim && newDirEntries.length) return callback(dirEntries, search);
|
||||||
} else return searchNextVariant();
|
} else return searchNextVariant();
|
||||||
@ -420,9 +441,18 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
if (ns < cns) return 1;
|
if (ns < cns) return 1;
|
||||||
if (ns > cns) return -1;
|
if (ns > cns) return -1;
|
||||||
// We should now be in namespace A (old format ZIM) or C (new format ZIM)
|
// We should now be in namespace A (old format ZIM) or C (new format ZIM)
|
||||||
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
|
if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
|
||||||
|
ti = ti.toLocaleLowerCase();
|
||||||
|
prefix = prefix.toLocaleLowerCase();
|
||||||
|
}
|
||||||
|
return prefix <= ti ? -1 : 1;
|
||||||
} else {
|
} else {
|
||||||
return prefix <= ns + '/' + ti ? -1 : 1;
|
if (search.lc) { // Search comparator should be lowercase (for case-insensitive search)
|
||||||
|
ns = ns + '/' + ti.replace(/^((?:[AH])?)\/?.*/, '$1');
|
||||||
|
ti = ti.replace(/^[AH]\//, '').toLocaleLowerCase();
|
||||||
|
}
|
||||||
|
// if (search.rgxPrefix && search.rgxPrefix.test(ti)) return -1;
|
||||||
|
return prefix <= (ns + '/' + ti) ? -1 : 1;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}, true).then(function(firstIndex) {
|
}, true).then(function(firstIndex) {
|
||||||
@ -445,9 +475,13 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
var title = dirEntry.getTitleOrUrl();
|
var title = dirEntry.getTitleOrUrl();
|
||||||
// If we are searching by URL, display namespace also
|
// If we are searching by URL, display namespace also
|
||||||
if (search.searchUrlIndex) title = dirEntry.namespace + '/' + dirEntry.url;
|
if (search.searchUrlIndex) title = dirEntry.namespace + '/' + dirEntry.url;
|
||||||
|
if (search.lc && !search.rgxPrefix) { // Search comparator should be lowercase if not using regex (for case-insensitive search)
|
||||||
|
var ns = title.replace(/^((?:C\/)?(?:[AH]\/)?).*/, '$1');
|
||||||
|
title = ns + title.replace(ns, '').toLocaleLowerCase();
|
||||||
|
}
|
||||||
// Only return dirEntries with titles that actually begin with prefix
|
// Only return dirEntries with titles that actually begin with prefix
|
||||||
if (saveStartIndex === null || (search.searchUrlIndex || dirEntry.namespace === cns) && title.indexOf(prefix) === 0) {
|
if (saveStartIndex === null || (search.searchUrlIndex || dirEntry.namespace === cns) && title.indexOf(prefix) === 0) {
|
||||||
if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title.replace(prefix, ''))) {
|
if (!search.rgxPrefix || search.rgxPrefix && search.rgxPrefix.test(title)) { // Regex test case-insensitive if i flag set
|
||||||
vDirEntries.push(dirEntry);
|
vDirEntries.push(dirEntry);
|
||||||
// Report interim result
|
// Report interim result
|
||||||
if (typeof saveStartIndex === 'undefined') callback([dirEntry], false, true);
|
if (typeof saveStartIndex === 'undefined') callback([dirEntry], false, true);
|
||||||
@ -610,6 +644,9 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
}
|
}
|
||||||
callback(dirEntry, data);
|
callback(dirEntry, data);
|
||||||
}
|
}
|
||||||
|
}).catch(function (e) {
|
||||||
|
console.error('Error reading directory entry', e);
|
||||||
|
callback(dirEntry, '');
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -681,11 +718,30 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/');
|
path = path.replace(/^A\//, 'H/').replace(/^(C\/)A\//, '$1H/');
|
||||||
console.debug('DirEntry ' + oldPath + ' not found, looking up header: ' + path);
|
console.debug('DirEntry ' + oldPath + ' not found, looking up header: ' + path);
|
||||||
return that.getDirEntryByPath(path, true, oldPath);
|
return that.getDirEntryByPath(path, true, oldPath);
|
||||||
|
// } else if (zimitResolving) {
|
||||||
|
} else if (zimitResolving && appstate.originalPath && appstate.originalPath === appstate.expectedArticleURLToBeDisplayed) {
|
||||||
|
// We couldn't find the Header, so try a fuzzy search only if the user is loading an article
|
||||||
|
path = appstate.originalPath;
|
||||||
|
var ns = path.replace(/^((?:C\/)?A\/).*/, '$1'); // If Zimit pseudo-namespaces are changed, will need to edit this
|
||||||
|
path = path.replace(ns, '');
|
||||||
|
path = path.toLocaleLowerCase(); // We are going to combine case-insensitive string comparison with regex matching
|
||||||
|
var rgxPath = path.replace(/([-/?.$^|*+()[{])/g, '\\$1'); // Make sure we escape regex characters
|
||||||
|
path = ns + path; // Add namespace back to path for full matching
|
||||||
|
// path = ns;
|
||||||
|
var search = {
|
||||||
|
rgxPrefix: new RegExp('.*' + rgxPath, 'i'),
|
||||||
|
searchUrlIndex: true,
|
||||||
|
lc: true, // Make the comparator (e.g. dirEntry.url) lowercase
|
||||||
|
size: 1,
|
||||||
|
found: 0
|
||||||
|
}
|
||||||
|
return fuzzySearch(path, search);
|
||||||
|
} else {
|
||||||
|
var newpath = path.replace(/^((?:A|C\/A)\/)[^/]+\/(.+)$/, '$1$2');
|
||||||
|
if (newpath === path) return null; // No further paths to explore!
|
||||||
|
console.log("Article " + path + " not available, but moving up one directory to compensate for ZIM coding error...");
|
||||||
|
return that.getDirEntryByPath(newpath);
|
||||||
}
|
}
|
||||||
var newpath = path.replace(/^((?:A|C\/A)\/)[^/]+\/(.+)$/, '$1$2');
|
|
||||||
if (newpath === path) return null; // No further paths to explore!
|
|
||||||
console.log("Article " + path + " not available, but moving up one directory to compensate for ZIM coding error...");
|
|
||||||
return that.getDirEntryByPath(newpath);
|
|
||||||
} else {
|
} else {
|
||||||
// DEBUG: List found Directory Entry
|
// DEBUG: List found Directory Entry
|
||||||
// if (dirEntry) console.debug('Found ' + path);
|
// if (dirEntry) console.debug('Found ' + path);
|
||||||
@ -694,6 +750,37 @@ define(['zimfile', 'zimDirEntry', 'transformZimit', 'util', 'uiUtil', 'utf8'],
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate a fuzzy search for dirEntries matching the search object
|
||||||
|
* @param {String} path Human-readable path to search for
|
||||||
|
* @param {Object} search The search object
|
||||||
|
* @returns {Promise<DirEntry>} A Promise that resolves to a Directory Entry, or null if not found
|
||||||
|
*/
|
||||||
|
function fuzzySearch(path, search) {
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
console.log('Initiating fuzzy search for ' + path + '...');
|
||||||
|
uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
|
||||||
|
var searchResolved = false;
|
||||||
|
// setTimeout(function () {
|
||||||
|
// if (!searchResolved) uiUtil.pollSpinner('Fuzzy search for ' + path + '...', true);
|
||||||
|
// }, 5000);
|
||||||
|
appstate.selectedArchive.findDirEntriesWithPrefixCaseSensitive(path, search, function (dirEntry) {
|
||||||
|
if (!search.found && dirEntry && dirEntry[0] && dirEntry[0].url) {
|
||||||
|
search.found++;
|
||||||
|
dirEntry = dirEntry[0];
|
||||||
|
dirEntry = transformZimit.filterReplayFiles(dirEntry);
|
||||||
|
if (dirEntry) console.debug('Found ' + dirEntry.url + ' in fuzzy search');
|
||||||
|
searchResolved = true;
|
||||||
|
resolve(dirEntry);
|
||||||
|
} else {
|
||||||
|
console.debug('No fuzzy search results found');
|
||||||
|
searchResolved = true;
|
||||||
|
resolve(null);
|
||||||
|
}
|
||||||
|
}, null);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param {callbackDirEntry} callback
|
* @param {callbackDirEntry} callback
|
||||||
|
Loading…
x
Reference in New Issue
Block a user