diff --git a/tests/tests.js b/tests/tests.js index 91bfe399..211e1272 100644 --- a/tests/tests.js +++ b/tests/tests.js @@ -381,7 +381,7 @@ define(['jquery', 'title', 'archive', 'zimArchive', 'zimDirEntry', 'util', 'geom test("check remove duplicates of an array of title objects", function() { var array = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"a"}, {title:"c"}, {title:"d"}]; var expectedArray = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"d"}]; - deepEqual(util.removeDuplicatesInArray(array), expectedArray, "Duplicates should be removed from the array"); + deepEqual(util.removeDuplicateTitlesInArray(array), expectedArray, "Duplicates should be removed from the array"); }); module("evopedia_articles_nearby"); diff --git a/www/js/lib/util.js b/www/js/lib/util.js index aa2d41ef..6fc07743 100644 --- a/www/js/lib/util.js +++ b/www/js/lib/util.js @@ -38,12 +38,8 @@ define(['q'], function(q) { * @returns {String} */ function ucFirstLetter(string) { - if (string) { - if (string.length >= 1) { - return string.charAt(0).toLocaleUpperCase() + string.slice(1); - } else { - return string; - } + if (string && string.length >= 1) { + return string[0].toLocaleUpperCase() + string.slice(1); } else { return string; } @@ -104,6 +100,25 @@ define(['q'], function(q) { return array; } + /** + * Generates an array of Strings, where all duplicates have been removed + * (without changing the order) + * It is optimized for small arrays. + * Source : http://codereview.stackexchange.com/questions/60128/removing-duplicates-from-an-array-quickly + * + * @param {Array.} array of String + * @returns {Array.<Title>} same array of Strings, without duplicates + */ + function removeDuplicateStringsInSmallArray(array) { + var unique = []; + for (var i = 0; i < array.length; i++) { + var current = array[i]; + if (unique.indexOf(current) < 0) + unique.push(current); + } + return unique; + } + /** * Read an integer encoded in 4 bytes, little endian * @param {Array} byteArray @@ -298,7 +313,8 @@ define(['q'], function(q) { ucFirstLetter: ucFirstLetter, lcFirstLetter: lcFirstLetter, ucEveryFirstLetter: ucEveryFirstLetter, - removeDuplicatesInArray: removeDuplicateTitlesInArray, + removeDuplicateTitlesInArray: removeDuplicateTitlesInArray, + removeDuplicateStringsInSmallArray: removeDuplicateStringsInSmallArray, readIntegerFrom4Bytes: readIntegerFrom4Bytes, readIntegerFrom2Bytes : readIntegerFrom2Bytes, readFloatFrom4Bytes : readFloatFrom4Bytes, diff --git a/www/js/lib/zimArchive.js b/www/js/lib/zimArchive.js index fab1c788..87094fd1 100644 --- a/www/js/lib/zimArchive.js +++ b/www/js/lib/zimArchive.js @@ -113,7 +113,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], * For now, ZIM titles are case sensitive. * So, as workaround, we try several variants of the prefix to find more results. * This should be enhanced when the ZIM format will be modified to store normalized titles - * See https://github.com/mossroy/evopedia-html5/issues/117 + * See https://phabricator.wikimedia.org/T108536 * * @param {String} prefix * @param {Integer} resultSize @@ -121,39 +121,21 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], */ ZIMArchive.prototype.findTitlesWithPrefix = function(prefix, resultSize, callback) { var that = this; - that.findTitlesWithPrefixCaseSensitive(prefix, resultSize, function(titles) { - if (titles.length < resultSize) { - // Let's add results with first letter upper-case - var ucPrefix = util.ucFirstLetter(prefix); - that.findTitlesWithPrefixCaseSensitive(ucPrefix, resultSize, function(ucTitles) { - titles.push.apply(titles, ucTitles); - titles = util.removeDuplicatesInArray(titles); - if (titles.length < resultSize) { - // Let's add results with first letter lower-case - var lcPrefix = util.ucFirstLetter(prefix); - that.findTitlesWithPrefixCaseSensitive(lcPrefix, resultSize, function(lcTitles) { - titles.push.apply(titles, lcTitles); - titles = util.removeDuplicatesInArray(titles); - if (titles.length < resultSize) { - // Let's add results with first letter of every word upper-case - var ucEveryWordPrefix = util.ucEveryFirstLetter(prefix); - that.findTitlesWithPrefixCaseSensitive(ucEveryWordPrefix, resultSize, function (ucEveryTitles) { - titles.push.apply(titles, ucEveryTitles); - titles = util.removeDuplicatesInArray(titles); - callback(titles); - }); - } else { - callback(titles); - } - }); - } else { - callback(titles); - } - }); - } else { + var prefixVariants = util.removeDuplicateStringsInSmallArray([prefix, util.ucFirstLetter(prefix), util.lcFirstLetter(prefix), util.ucEveryFirstLetter(prefix)]); + var titles = []; + function searchNextVariant() { + if (prefixVariants.length === 0 || titles.length >= resultSize) { callback(titles); + return; } - }); + var prefix = prefixVariants[0]; + prefixVariants = prefixVariants.slice(1); + that.findTitlesWithPrefixCaseSensitive(prefix, resultSize - titles.length, function (newTitles) { + titles.push.apply(titles, newTitles); + searchNextVariant(); + }); + } + searchNextVariant(); }; /**