mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-24 04:54:51 -04:00
Fix UTF-8 encoding of article titles
This commit is contained in:
parent
68a2f6a5f3
commit
2701f47a87
@ -43,6 +43,31 @@ function readIntegerFrom4Bytes(byteArray,firstIndex) {
|
|||||||
return byteArray[firstIndex] + byteArray[firstIndex+1]*256 + byteArray[firstIndex+2]*65536 + byteArray[firstIndex+3]*16777216;
|
return byteArray[firstIndex] + byteArray[firstIndex+1]*256 + byteArray[firstIndex+2]*65536 + byteArray[firstIndex+3]*16777216;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a UTF-8 byte array to JavaScript's 16-bit Unicode.
|
||||||
|
* @param {Array.<number>} bytes UTF-8 byte array.
|
||||||
|
* @return {string} 16-bit Unicode string.
|
||||||
|
* Copied from http://closure-library.googlecode.com/svn/docs/closure_goog_crypt.js.source.html (Apache License 2.0)
|
||||||
|
*/
|
||||||
|
function utf8ByteArrayToString(bytes,startIndex,endIndex) {
|
||||||
|
var out = [], pos = startIndex, c = 0;
|
||||||
|
while (pos < bytes.length && pos < endIndex) {
|
||||||
|
var c1 = bytes[pos++];
|
||||||
|
if (c1 < 128) {
|
||||||
|
out[c++] = String.fromCharCode(c1);
|
||||||
|
} else if (c1 > 191 && c1 < 224) {
|
||||||
|
var c2 = bytes[pos++];
|
||||||
|
out[c++] = String.fromCharCode((c1 & 31) << 6 | c2 & 63);
|
||||||
|
} else {
|
||||||
|
var c2 = bytes[pos++];
|
||||||
|
var c3 = bytes[pos++];
|
||||||
|
out[c++] = String.fromCharCode(
|
||||||
|
(c1 & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out.join('');
|
||||||
|
};
|
||||||
|
|
||||||
function readAllTitlesFromIndex(titleFile) {
|
function readAllTitlesFromIndex(titleFile) {
|
||||||
if (titleFile) {
|
if (titleFile) {
|
||||||
var reader = new FileReader();
|
var reader = new FileReader();
|
||||||
@ -88,18 +113,13 @@ function readAllTitlesFromIndex(titleFile) {
|
|||||||
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=128) {
|
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=128) {
|
||||||
newLineIndex++;
|
newLineIndex++;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
for (var j=i+15;j<newLineIndex;j++) {
|
for (var j=i+15;j<newLineIndex;j++) {
|
||||||
title += String.fromCharCode(byteArray[j]);
|
title += String.fromCharCode(byteArray[j]);
|
||||||
}
|
}
|
||||||
// TODO : Read the title properly with UTF-8 encoding
|
|
||||||
/*
|
|
||||||
var buf = new ArrayBuffer();
|
|
||||||
var bufView = new Uint16Array(buf);
|
|
||||||
for (var j=0;j<newLineIndex-i-15;j++) {
|
|
||||||
bufView[j]=byteArray[j+i+15];
|
|
||||||
}
|
|
||||||
title = String.fromCharCode(bufView);
|
|
||||||
*/
|
*/
|
||||||
|
// TODO : Read the title properly with UTF-8 encoding
|
||||||
|
title = utf8ByteArrayToString(byteArray,i+15,newLineIndex);
|
||||||
|
|
||||||
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
||||||
titleNumber++;
|
titleNumber++;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user