mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-24 04:54:51 -04:00
Merge pull request #160 from kiwix/issue124-title-search-with-case-variants
On title search, try some upper/lower case variants.
This commit is contained in:
commit
f6ea93903a
@ -377,6 +377,21 @@ define(['jquery', 'title', 'archive', 'zimArchive', 'zimDirEntry', 'util', 'geom
|
||||
var float = util.readFloatFrom4Bytes(byteArray, 0);
|
||||
equal(float, -118.625, "the IEEE_754 float should be converted as -118.625");
|
||||
});
|
||||
test("check upper/lower case variations", function() {
|
||||
var testString1 = "téléphone";
|
||||
var testString2 = "Paris";
|
||||
var testString3 = "le Couvre-chef Est sur le porte-manteaux";
|
||||
var testString4 = "épée";
|
||||
equal(util.ucFirstLetter(testString1), "Téléphone", "The first letter should be upper-case");
|
||||
equal(util.lcFirstLetter(testString2), "paris", "The first letter should be lower-case");
|
||||
equal(util.ucEveryFirstLetter(testString3), "Le Couvre-Chef Est Sur Le Porte-Manteaux", "The first letter of every word should be upper-case");
|
||||
equal(util.ucFirstLetter(testString4), "Épée", "The first letter should be upper-case (with accent)");
|
||||
});
|
||||
test("check remove duplicates of an array of title objects", function() {
|
||||
var array = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"a"}, {title:"c"}, {title:"d"}];
|
||||
var expectedArray = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"d"}];
|
||||
deepEqual(util.removeDuplicateTitlesInArray(array), expectedArray, "Duplicates should be removed from the array");
|
||||
});
|
||||
|
||||
module("evopedia_articles_nearby");
|
||||
asyncTest("check articles found nearby France and Germany", function() {
|
||||
@ -498,9 +513,7 @@ define(['jquery', 'title', 'archive', 'zimArchive', 'zimDirEntry', 'util', 'geom
|
||||
|
||||
module("zim_title_search_and_read");
|
||||
asyncTest("check DirEntry.fromStringId 'A Fool for You'", function() {
|
||||
// Construct the DirEntry for Arikitcac article
|
||||
// NB : this must be done inside a test or asyncTest function, else the localZimArchive is not ready yet
|
||||
var arikitcacDirEntry = zimDirEntry.DirEntry.fromStringId(localZimArchive._file, "5856|7|A|0|2|A_Fool_for_You.html|A Fool for You|false|undefined");
|
||||
var aFoolForYouDirEntry = zimDirEntry.DirEntry.fromStringId(localZimArchive._file, "5856|7|A|0|2|A_Fool_for_You.html|A Fool for You|false|undefined");
|
||||
|
||||
expect(2);
|
||||
var callbackFunction = function(title, htmlArticle) {
|
||||
@ -510,7 +523,37 @@ define(['jquery', 'title', 'archive', 'zimArchive', 'zimDirEntry', 'util', 'geom
|
||||
ok(htmlArticle.match("^.*<h1[^>]*>A Fool for You</h1>"), "'A Fool for You' title somewhere in the article");
|
||||
start();
|
||||
};
|
||||
localZimArchive.readArticle(arikitcacDirEntry, callbackFunction);
|
||||
localZimArchive.readArticle(aFoolForYouDirEntry, callbackFunction);
|
||||
});
|
||||
asyncTest("check findTitlesWithPrefix 'A'", function() {
|
||||
expect(2);
|
||||
var callbackFunction = function(titleList) {
|
||||
ok(titleList && titleList.length === 5, "Article list with 5 results");
|
||||
var firstTitle = titleList[0];
|
||||
equal(firstTitle.title , 'A Fool for You', 'First result should be "A Fool for You"');
|
||||
start();
|
||||
};
|
||||
localZimArchive.findTitlesWithPrefix('A', 5, callbackFunction);
|
||||
});
|
||||
asyncTest("check findTitlesWithPrefix 'a'", function() {
|
||||
expect(2);
|
||||
var callbackFunction = function(titleList) {
|
||||
ok(titleList && titleList.length === 5, "Article list with 5 results");
|
||||
var firstTitle = titleList[0];
|
||||
equal(firstTitle.title , 'A Fool for You', 'First result should be "A Fool for You"');
|
||||
start();
|
||||
};
|
||||
localZimArchive.findTitlesWithPrefix('a', 5, callbackFunction);
|
||||
});
|
||||
asyncTest("check findTitlesWithPrefix 'blues brothers'", function() {
|
||||
expect(2);
|
||||
var callbackFunction = function(titleList) {
|
||||
ok(titleList && titleList.length === 3, "Article list with 3 result");
|
||||
var firstTitle = titleList[0];
|
||||
equal(firstTitle.title , 'Blues Brothers (film)', 'First result should be "Blues Brothers (film)"');
|
||||
start();
|
||||
};
|
||||
localZimArchive.findTitlesWithPrefix('blues brothers', 5, callbackFunction);
|
||||
});
|
||||
asyncTest("article '(The Night Time Is) The Right Time' correctly redirects to 'Night Time Is the Right Time'", function() {
|
||||
expect(6);
|
||||
|
@ -32,6 +32,93 @@ define(['q'], function(q) {
|
||||
return str.indexOf(suffix, str.length - suffix.length) !== -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the same String with the first letter in upper-case
|
||||
* @param {String} string
|
||||
* @returns {String}
|
||||
*/
|
||||
function ucFirstLetter(string) {
|
||||
if (string && string.length >= 1) {
|
||||
return string[0].toLocaleUpperCase() + string.slice(1);
|
||||
} else {
|
||||
return string;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the same String with the first letter in lower-case
|
||||
* @param {String} string
|
||||
* @returns {String}
|
||||
*/
|
||||
function lcFirstLetter(string) {
|
||||
if (string) {
|
||||
if (string.length >= 1) {
|
||||
return string.charAt(0).toLocaleLowerCase() + string.slice(1);
|
||||
} else {
|
||||
return string;
|
||||
}
|
||||
} else {
|
||||
return string;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the same String with the first letter of every word in upper-case
|
||||
* @param {String} string
|
||||
* @returns {String}
|
||||
*/
|
||||
function ucEveryFirstLetter(string) {
|
||||
if (string) {
|
||||
return string.replace( /\b\w/g, function (m) {
|
||||
return m.toLocaleUpperCase();
|
||||
});
|
||||
} else {
|
||||
return string;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an array of Titles, where all duplicates have been removed
|
||||
* (it also sorts the titles)
|
||||
*
|
||||
* @param {Array.<Title>} array of Titles
|
||||
* @returns {Array.<Title>} same array of Titles, without duplicates
|
||||
*/
|
||||
function removeDuplicateTitlesInArray(array) {
|
||||
array.sort(function(titleA, titleB) {
|
||||
if (titleA.title < titleB.title) return -1;
|
||||
if (titleA.title > titleB.title) return 1;
|
||||
return 0;
|
||||
});
|
||||
for(var i = 1; i < array.length; ){
|
||||
if(array[i-1].title === array[i].title){
|
||||
array.splice(i, 1);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an array of Strings, where all duplicates have been removed
|
||||
* (without changing the order)
|
||||
* It is optimized for small arrays.
|
||||
* Source : http://codereview.stackexchange.com/questions/60128/removing-duplicates-from-an-array-quickly
|
||||
*
|
||||
* @param {Array.<Title>} array of String
|
||||
* @returns {Array.<Title>} same array of Strings, without duplicates
|
||||
*/
|
||||
function removeDuplicateStringsInSmallArray(array) {
|
||||
var unique = [];
|
||||
for (var i = 0; i < array.length; i++) {
|
||||
var current = array[i];
|
||||
if (unique.indexOf(current) < 0)
|
||||
unique.push(current);
|
||||
}
|
||||
return unique;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read an integer encoded in 4 bytes, little endian
|
||||
* @param {Array} byteArray
|
||||
@ -223,6 +310,11 @@ define(['q'], function(q) {
|
||||
*/
|
||||
return {
|
||||
endsWith: endsWith,
|
||||
ucFirstLetter: ucFirstLetter,
|
||||
lcFirstLetter: lcFirstLetter,
|
||||
ucEveryFirstLetter: ucEveryFirstLetter,
|
||||
removeDuplicateTitlesInArray: removeDuplicateTitlesInArray,
|
||||
removeDuplicateStringsInSmallArray: removeDuplicateStringsInSmallArray,
|
||||
readIntegerFrom4Bytes: readIntegerFrom4Bytes,
|
||||
readIntegerFrom2Bytes : readIntegerFrom2Bytes,
|
||||
readFloatFrom4Bytes : readFloatFrom4Bytes,
|
||||
|
@ -109,13 +109,43 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
|
||||
*/
|
||||
|
||||
/**
|
||||
* Look for titles starting with the given prefix.
|
||||
* For now, ZIM titles are case sensitive.
|
||||
* So, as workaround, we try several variants of the prefix to find more results.
|
||||
* This should be enhanced when the ZIM format will be modified to store normalized titles
|
||||
* See https://phabricator.wikimedia.org/T108536
|
||||
*
|
||||
* @param {String} prefix
|
||||
* @param {Integer} resultSize
|
||||
* @param {type} callback
|
||||
* @returns {callbackTitleList}
|
||||
* @param {callbackTitleList} callback
|
||||
*/
|
||||
ZIMArchive.prototype.findTitlesWithPrefix = function(prefix, resultSize, callback) {
|
||||
var that = this;
|
||||
var prefixVariants = util.removeDuplicateStringsInSmallArray([prefix, util.ucFirstLetter(prefix), util.lcFirstLetter(prefix), util.ucEveryFirstLetter(prefix)]);
|
||||
var titles = [];
|
||||
function searchNextVariant() {
|
||||
if (prefixVariants.length === 0 || titles.length >= resultSize) {
|
||||
callback(titles);
|
||||
return;
|
||||
}
|
||||
var prefix = prefixVariants[0];
|
||||
prefixVariants = prefixVariants.slice(1);
|
||||
that.findTitlesWithPrefixCaseSensitive(prefix, resultSize - titles.length, function (newTitles) {
|
||||
titles.push.apply(titles, newTitles);
|
||||
searchNextVariant();
|
||||
});
|
||||
}
|
||||
searchNextVariant();
|
||||
};
|
||||
|
||||
/**
|
||||
* Look for titles starting with the given prefix (case-sensitive)
|
||||
*
|
||||
* @param {String} prefix
|
||||
* @param {Integer} resultSize
|
||||
* @param {callbackTitleList} callback
|
||||
*/
|
||||
ZIMArchive.prototype.findTitlesWithPrefixCaseSensitive = function(prefix, resultSize, callback) {
|
||||
var that = this;
|
||||
util.binarySearch(0, this._file.articleCount, function(i) {
|
||||
return that._file.dirEntryByTitleIndex(i).then(function(dirEntry) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user