Improvements in case variants for title search.

Based on the suggestions of peter-x in #160 The code is more readable, and should be a bit faster.
2025-09-22 12:01:15 -04:00 · 2016-01-16 14:44:09 +01:00 · 2016-01-16 14:44:09 +01:00 · a1e581ff99
commit a1e581ff99
parent 8cf42f8140
3 changed files with 38 additions and 40 deletions
--- a/tests/tests.js
+++ b/tests/tests.js
@ -381,7 +381,7 @@ define(['jquery', 'title', 'archive', 'zimArchive', 'zimDirEntry', 'util', 'geom
        test("check remove duplicates of an array of title objects", function() {
            var array = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"a"}, {title:"c"}, {title:"d"}];
            var expectedArray = [{title:"a"}, {title:"b"}, {title:"c"}, {title:"d"}];
-            deepEqual(util.removeDuplicatesInArray(array), expectedArray, "Duplicates should be removed from the array");
+            deepEqual(util.removeDuplicateTitlesInArray(array), expectedArray, "Duplicates should be removed from the array");
        });
        
        module("evopedia_articles_nearby");
--- a/www/js/lib/util.js
+++ b/www/js/lib/util.js
@ -38,12 +38,8 @@ define(['q'], function(q) {
     * @returns {String}
     */
    function ucFirstLetter(string) {
-        if (string) {
-            if (string.length >= 1) {
-                return string.charAt(0).toLocaleUpperCase() + string.slice(1);
-            } else {
-                return string;
-            }
+        if (string && string.length >= 1) {
+            return string[0].toLocaleUpperCase() + string.slice(1);
        } else {
            return string;
        }
@ -104,6 +100,25 @@ define(['q'], function(q) {
        return array;
    }
    
+    /**
+     * Generates an array of Strings, where all duplicates have been removed
+     * (without changing the order)
+     * It is optimized for small arrays.
+     * Source : http://codereview.stackexchange.com/questions/60128/removing-duplicates-from-an-array-quickly
+     * 
+     * @param {Array.<Title>} array of String
+     * @returns {Array.<Title>} same array of Strings, without duplicates
+     */
+    function removeDuplicateStringsInSmallArray(array) {
+        var unique = [];
+        for (var i = 0; i < array.length; i++) {
+            var current = array[i];
+            if (unique.indexOf(current) < 0)
+                unique.push(current);
+        }
+        return unique;
+    }
+    
    /**
     * Read an integer encoded in 4 bytes, little endian
     * @param {Array} byteArray
@ -298,7 +313,8 @@ define(['q'], function(q) {
        ucFirstLetter: ucFirstLetter,
        lcFirstLetter: lcFirstLetter,
        ucEveryFirstLetter: ucEveryFirstLetter,
-        removeDuplicatesInArray: removeDuplicateTitlesInArray,
+        removeDuplicateTitlesInArray: removeDuplicateTitlesInArray,
+        removeDuplicateStringsInSmallArray: removeDuplicateStringsInSmallArray,
        readIntegerFrom4Bytes: readIntegerFrom4Bytes,
        readIntegerFrom2Bytes : readIntegerFrom2Bytes,
        readFloatFrom4Bytes : readFloatFrom4Bytes,
--- a/www/js/lib/zimArchive.js
+++ b/www/js/lib/zimArchive.js
@ -113,7 +113,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
     * For now, ZIM titles are case sensitive.
     * So, as workaround, we try several variants of the prefix to find more results.
     * This should be enhanced when the ZIM format will be modified to store normalized titles
-     * See https://github.com/mossroy/evopedia-html5/issues/117
+     * See https://phabricator.wikimedia.org/T108536
     * 
     * @param {String} prefix
     * @param {Integer} resultSize
@ -121,39 +121,21 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
     */
    ZIMArchive.prototype.findTitlesWithPrefix = function(prefix, resultSize, callback) {
        var that = this;
-        that.findTitlesWithPrefixCaseSensitive(prefix, resultSize, function(titles) {
-            if (titles.length < resultSize) {
-                // Let's add results with first letter upper-case
-                var ucPrefix = util.ucFirstLetter(prefix);
-                that.findTitlesWithPrefixCaseSensitive(ucPrefix, resultSize, function(ucTitles) {
-                    titles.push.apply(titles, ucTitles);
-                    titles = util.removeDuplicatesInArray(titles);
-                    if (titles.length < resultSize) {
-                        // Let's add results with first letter lower-case
-                        var lcPrefix = util.ucFirstLetter(prefix);
-                        that.findTitlesWithPrefixCaseSensitive(lcPrefix, resultSize, function(lcTitles) {
-                            titles.push.apply(titles, lcTitles);
-                            titles = util.removeDuplicatesInArray(titles);
-                            if (titles.length < resultSize) {
-                                // Let's add results with first letter of every word upper-case
-                                var ucEveryWordPrefix = util.ucEveryFirstLetter(prefix);
-                                that.findTitlesWithPrefixCaseSensitive(ucEveryWordPrefix, resultSize, function (ucEveryTitles) {
-                                    titles.push.apply(titles, ucEveryTitles);
-                                    titles = util.removeDuplicatesInArray(titles);
-                                    callback(titles);
-                                });
-                            } else {
-                                callback(titles);
-                            }
-                        });
-                    } else {
-                        callback(titles);
-                    }
-                });
-            } else {
+        var prefixVariants = util.removeDuplicateStringsInSmallArray([prefix, util.ucFirstLetter(prefix), util.lcFirstLetter(prefix), util.ucEveryFirstLetter(prefix)]);
+        var titles = [];
+        function searchNextVariant() {
+            if (prefixVariants.length === 0 || titles.length >= resultSize) {
                callback(titles);
+                return;
            }
-        });
+            var prefix = prefixVariants[0];
+            prefixVariants = prefixVariants.slice(1);
+            that.findTitlesWithPrefixCaseSensitive(prefix, resultSize - titles.length, function (newTitles) {
+                titles.push.apply(titles, newTitles);
+                searchNextVariant();
+            });
+        }
+        searchNextVariant();
    };
    
    /**