Fixes in the search algorithm

This commit is contained in:
mossroy 2013-05-10 15:18:34 +02:00
parent 12f2b7187b
commit 5457ab0f51
2 changed files with 44 additions and 29 deletions

View File

@ -37,21 +37,45 @@ define(function (require) {
equal(titleList.length, 4, "4 titles found, as requested"); equal(titleList.length, 4, "4 titles found, as requested");
var indexAbraham=-1; var indexAbraham=-1;
for (var i=0; i<titleList.length; i++) { for (var i=0; i<titleList.length; i++) {
if (titleList[i].name == "Abraham") { if (titleList[i] && titleList[i].name == "Abraham") {
indexAbraham=i; indexAbraham=i;
} }
} }
ok(indexAbraham>-1,"Title 'Abraham' found"); ok(indexAbraham>-1,"Title 'Abraham' found");
var firstTitleName = titleList[0].name; var firstTitleName = "not found";
var secondTitleName = titleList[1].name; var secondTitleName = "not found";
// TODO : fix the algorithm, so that these tests can work if (titleList.length>=1 && titleList[0]) {
//equal(firstTitleName,"Abbasid_Caliphate","First article name is 'Abbasid_Caliphate'"); firstTitleName = titleList[0].name;
//equal(secondTitleName,"Abortion","Second article name is 'Abortion'"); }
if (titleList.length>=2 && titleList[1]) {
secondTitleName = titleList[1].name;
}
equal(firstTitleName,"Abbasid_Caliphate","First article name is 'Abbasid_Caliphate'");
equal(secondTitleName,"Abortion","Second article name is 'Abortion'");
start(); start();
}; };
localArchive.getTitlesStartingAtOffset(0, 4, callbackFunction); localArchive.getTitlesStartingAtOffset(0, 4, callbackFunction);
}); });
asyncTest("check findTitlesWithPrefix Am", function() {
var callbackFunction = function(titleList) {
ok(titleList && titleList.length>0,"At least one title is found");
var firstTitleName = "not found";
var secondTitleName = "not found";
if (titleList.length>=1 && titleList[0]) {
firstTitleName = titleList[0].name;
}
if (titleList.length>=2 && titleList[1]) {
secondTitleName = titleList[1].name;
}
equal(firstTitleName,"Amazon_River","First article name is 'Amazon_River'");
equal(secondTitleName,"American_Civil_War","Second article name is 'American_Civil_War'");
equal(titleList.length,4,"4 titles should be found");
start();
};
localArchive.findTitlesWithPrefix("Am", callbackFunction);
});
//TODO check findTitlesWithPrefix //TODO check findTitlesWithPrefix
// Create a title instance for the Article 'Abraham' // Create a title instance for the Article 'Abraham'

View File

@ -129,15 +129,13 @@ define(function(require) {
} }
} }
if (newLineIndex == startIndex) { if (newLineIndex == startIndex) {
// Enf of file reached // End of file reached
hi = mid; hi = mid;
} }
else { else {
var normalizedTitle = remove_diacritics.normalizeString(utf8ByteArrayToString(byteArray,startIndex,newLineIndex)); var normalizedTitle = remove_diacritics.normalizeString(utf8ByteArrayToString(byteArray,startIndex,newLineIndex)).toLowerCase();
//alert("normalizedTitle = " + normalizedTitle + "lo = "+lo+" hi="+hi); if (normalizedTitle < normalizedPrefix) {
//alert("normalizedPrefix = " + normalizedPrefix); lo = mid + newLineIndex -1;
if (normalizedTitle.localeCompare(normalizedPrefix) < 0) {
lo = mid;
} }
else { else {
hi = mid; hi = mid;
@ -151,7 +149,7 @@ define(function(require) {
else { else {
if (lo > 0) { if (lo > 0) {
// Let lo point to the start of an entry // Let lo point to the start of an entry
lo++; lo++;lo++;
} }
// We found the closest title at index lo // We found the closest title at index lo
callbackFunction(lo); callbackFunction(lo);
@ -184,13 +182,9 @@ define(function(require) {
reader.onload = function(e) { reader.onload = function(e) {
var binaryTitleFile = e.target.result; var binaryTitleFile = e.target.result;
var byteArray = new Uint8Array(binaryTitleFile); var byteArray = new Uint8Array(binaryTitleFile);
// Look for the index of the next NewLine var i = 0;
var newLineIndex=0; var newLineIndex = 0;
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) { var titleNumber = 0;
newLineIndex++;
}
var i = newLineIndex;
var titleNumber = -1;
var titleList = new Array(); var titleList = new Array();
while (i<byteArray.length && titleNumber<titleCount) { while (i<byteArray.length && titleNumber<titleCount) {
// Look for the index of the next NewLine // Look for the index of the next NewLine
@ -207,13 +201,7 @@ define(function(require) {
var title = Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i); var title = Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i);
// Skip the titles that do not start with the prefix titleList[titleNumber] = title;
// TODO use a normalizer to compare the strings
// TODO see why we need to skip the first title
//if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
if (titleNumber>=0) {
titleList[titleNumber] = title;
}
titleNumber++; titleNumber++;
i=newLineIndex+1; i=newLineIndex+1;
} }
@ -237,7 +225,7 @@ define(function(require) {
alert('Title file read cancelled'); alert('Title file read cancelled');
}; };
var currentLocalArchiveInstance = this; var currentLocalArchiveInstance = this;
var normalizedTitleName = remove_diacritics.normalizeString(titleName); var normalizedTitleName = remove_diacritics.normalizeString(titleName).toLowerCase();
this.recursivePrefixSearch(reader, normalizedTitleName, 0, titleFileSize, function(titleOffset) { this.recursivePrefixSearch(reader, normalizedTitleName, 0, titleFileSize, function(titleOffset) {
currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction); currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction);
}); });
@ -268,8 +256,11 @@ define(function(require) {
alert('Title file read cancelled'); alert('Title file read cancelled');
}; };
var currentLocalArchiveInstance = this; var currentLocalArchiveInstance = this;
var normalizedPrefix = remove_diacritics.normalizeString(prefix).replace(" ","_"); var normalizedPrefix = remove_diacritics.normalizeString(prefix).replace(" ","_").toLowerCase();
this.recursivePrefixSearch(reader, normalizedPrefix, 0, titleFileSize, function(titleOffset) { this.recursivePrefixSearch(reader, normalizedPrefix, 0, titleFileSize, function(titleOffset) {
// TODO Skip the titles that do not start with the prefix
// TODO use a normalizer to compare the strings
//if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, 50, callbackFunction); currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, 50, callbackFunction);
}); });
}; };