From e141a769db7fb8c813d34e1a160a6851abb65619 Mon Sep 17 00:00:00 2001 From: mossroy Date: Sun, 4 Aug 2013 11:36:47 +0200 Subject: [PATCH] Fix the way we look for an article with a specific title name. Instead of taking the first result of the search on normalized names, we browse the results and take the one that exactly matches the title name we look for. Fixes #32 A few improvements also come in this commit, to externalize the maximum number of results displayed when searching, and to fix some bugs on the small dump --- js/app.js | 30 +++++++++++++++++++++--------- js/lib/archive.js | 39 ++++++++++++++++++++++++--------------- tests/tests.js | 35 +++++++++++++++++++++-------------- 3 files changed, 66 insertions(+), 38 deletions(-) diff --git a/js/app.js b/js/app.js index c5d07112..0e8530d0 100644 --- a/js/app.js +++ b/js/app.js @@ -10,6 +10,9 @@ define(function(require) { var evopediaTitle = require('title'); var evopediaArchive = require('archive'); var util = require('util'); + + // Maximum number of titles to display in a search + var MAX_SEARCH_RESULT_SIZE = 50; var localArchive = null; @@ -210,7 +213,7 @@ define(function(require) { $('#configuration').hide(); $('#articleContent').empty(); if (localArchive.titleFile) { - localArchive.findTitlesWithPrefix(prefix.trim(), populateListOfTitles); + localArchive.findTitlesWithPrefix(prefix.trim(), MAX_SEARCH_RESULT_SIZE, populateListOfTitles); } else { alert("Title file not set"); } @@ -284,15 +287,13 @@ define(function(require) { * @param {type} title */ function readArticle(title) { - if ($.isArray(title)) { - title = title[0]; - if (title.fileNr === 255) { - localArchive.resolveRedirect(title, readArticle); - return; - } + if (title.fileNr === 255) { + localArchive.resolveRedirect(title, readArticle); } + else { localArchive.readArticle(title, displayArticleInForm); } + } /** * Display the the given HTML article in the web page, @@ -314,6 +315,9 @@ define(function(require) { $('#articleContent').find('a').each(function() { // Store current link's url var url = $(this).attr("href"); + if (url === null || url === undefined) { + return; + } var lowerCaseUrl = url.toLowerCase(); var cssClass = $(this).attr("class"); @@ -396,8 +400,16 @@ define(function(require) { function goToArticle(titleName) { $("#articleName").html(titleName); $("#readingArticle").show(); - $("#articleContent").html(""); - localArchive.getTitleByName(titleName, readArticle); + localArchive.getTitleByName(titleName, function(title) { + if (title == null) { + $("#readingArticle").hide(); + alert("Article with title " + titleName + " not found in the archive"); + } + else { + $("#articleContent").html(""); + readArticle(title); + } + }); } }); diff --git a/js/lib/archive.js b/js/lib/archive.js index c2c3f848..ec82de8b 100644 --- a/js/lib/archive.js +++ b/js/lib/archive.js @@ -14,6 +14,9 @@ define(function(require) { // Size of chunks read in the dump files : 128 KB var CHUNK_SIZE = 131072; + // The maximum number of titles that can have the same name after normalizing + // This is used by the algorithm that searches for a specific article by its name + var MAX_TITLES_WITH_SAME_NORMALIZED_NAME = 30; /** * LocalArchive class : defines a wikipedia dump on the filesystem @@ -244,8 +247,8 @@ define(function(require) { /** * This function is recursively called after each asynchronous read, so that - * to find the closest index in titleFile to the given prefix When found, - * call the callbackFunction with the index + * to find the closest index in titleFile to the given prefix + * When found, call the callbackFunction with the index * * @param reader * @param normalizedPrefix @@ -307,15 +310,6 @@ define(function(require) { } }; - /** - * Look for a title in the title file at the given offset, and call the callbackFunction with this Title - * @param titleOffset - * @param callbackFunction - */ - LocalArchive.prototype.getTitleAtOffset = function(titleOffset, callbackFunction) { - this.getTitlesStartingAtOffset(titleOffset, 1, callbackFunction); - }; - /** * Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances * @param titleOffset @@ -367,6 +361,7 @@ define(function(require) { /** * Look for a title by its name, and call the callbackFunction with this Title + * If the title is not found, the callbackFunction is called with parameter null * @param titleName * @param callbackFunction */ @@ -380,7 +375,20 @@ define(function(require) { var currentLocalArchiveInstance = this; var normalizedTitleName = normalize_string.normalizeString(titleName); this.recursivePrefixSearch(reader, normalizedTitleName, 0, titleFileSize, function(titleOffset) { - currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction); + currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, MAX_TITLES_WITH_SAME_NORMALIZED_NAME, function(titleList) { + if (titleList !== null && titleList.length>0) { + for (var i=0; i[ \t]$"), " at the end"); start(); }; - var callbackTitleList = function(titleList) { - ok(titleList && titleList.length == 1, "One title found"); - var title = titleList[0]; + var callbackTitleFound = function(title) { + ok(title !== null, "Title found"); equal(title.name, "AIDS", "Name of the title is correct"); localArchive.readArticle(title, callbackArticleRead); }; - localArchive.getTitleByName("AIDS", callbackTitleList); + localArchive.getTitleByName("AIDS", callbackTitleFound); + }); + + asyncTest("check getTitleByName with a title name that does not exist in the archive", function() { + var callbackTitleFound = function(title) { + ok(title === null, "No title found because it does not exist in the archive"); + start(); + }; + localArchive.getTitleByName("abcdef", callbackTitleFound); }); asyncTest("check loading a math image", function() {