Fix in the algorithm, by handling the escape bits of the Title file

This commit is contained in:
mossroy 2013-06-04 12:16:33 +02:00
parent 3f382a572a
commit 8b752aed89
2 changed files with 36 additions and 19 deletions

View File

@ -14,6 +14,13 @@ define(function(require) {
return byteArray[firstIndex] + byteArray[firstIndex+1]*256 + byteArray[firstIndex+2]*65536 + byteArray[firstIndex+3]*16777216;
}
/**
* Read an integer encoded in 2 bytes
*/
function readIntegerFrom2Bytes(byteArray,firstIndex) {
return byteArray[firstIndex] + byteArray[firstIndex+1]*256;
}
/**
* Converts a UTF-8 byte array to JavaScript's 16-bit Unicode.
* @param {Array.<number>} bytes UTF-8 byte array.
@ -505,28 +512,22 @@ define(function(require) {
} else {
t.titleEntryLength = encodedTitle.length + 1;
}
// TODO : handle escapes
/*
int escapes = LittleEndianReader.readUInt16(encodedTitle, 0);
byte[] positionData = new byte[13];
System.arraycopy(encodedTitle, 2, positionData, 0, 13);
var escapedEncodedTitle = new Uint8Array(encodedTitle);
var escapes = readIntegerFrom2Bytes(encodedTitle, 0);
if ((escapes & (1 << 14)) != 0)
escapes |= '\n';
for (int i = 0; i < 13; i ++) {
for (var i = 0; i < 13; i ++) {
if ((escapes & (1 << i)) != 0)
positionData[i] = '\n';
escapedEncodedTitle[i+2] = 10; // Corresponds to \n
}
*/
t.fileNr = encodedTitle[2];
t.blockStart = readIntegerFrom4Bytes(encodedTitle, 3);
t.blockOffset = readIntegerFrom4Bytes(encodedTitle, 7);
t.articleLength = readIntegerFrom4Bytes(encodedTitle, 11);
t.fileNr = escapedEncodedTitle[2];
t.blockStart = readIntegerFrom4Bytes(escapedEncodedTitle, 3);
t.blockOffset = readIntegerFrom4Bytes(escapedEncodedTitle, 7);
t.articleLength = readIntegerFrom4Bytes(escapedEncodedTitle, 11);
t.name = Title.parseNameOnly(encodedTitle);
t.name = Title.parseNameOnly(escapedEncodedTitle);
return t;
};

View File

@ -85,9 +85,7 @@ define(function (require) {
titleAbraham.fileNr = 0;
titleAbraham.name = "Abraham";
titleAbraham.titleOffset = 57;
// TODO check parseTitle for Abraham, and for another one with escape characters
asyncTest("check getTitleByName with accents : Diego Velázquez", function() {
var callbackFunction = function(titleList) {
ok (titleList && titleList.length==1,"One title found");
@ -123,5 +121,23 @@ define(function (require) {
};
localArchive.readArticle(titleAbraham, callbackFunction);
});
asyncTest("check getTitleByName and readArticle with escape bytes", function(){
var callbackArticleRead = function(title, htmlArticle) {
ok(htmlArticle && htmlArticle.length>0,"Article not empty");
// Remove new lines
htmlArticle = htmlArticle.replace(/[\r\n]/g, " ");
ok(htmlArticle.match("^[ \t]*<h1[^>]*>AIDS</h1>"),"'AIDS' title at the beginning");
ok(htmlArticle.match("</div>[ \t]$"),"</div> at the end");
start();
};
var callbackTitleList = function(titleList) {
ok (titleList && titleList.length==1,"One title found");
var title = titleList[0];
equal(title.name,"AIDS","Name of the title is correct");
localArchive.readArticle(title, callbackArticleRead);
};
localArchive.getTitleByName("AIDS",callbackTitleList);
});
};
});