From 2701f47a87afb53384139b95334e9eef9caac997 Mon Sep 17 00:00:00 2001 From: mossroy Date: Tue, 25 Dec 2012 14:13:25 +0100 Subject: [PATCH] Fix UTF-8 encoding of article titles --- evopedia-html5/WebContent/evopedia.js | 36 +++++++++++++++++++++------ 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/evopedia-html5/WebContent/evopedia.js b/evopedia-html5/WebContent/evopedia.js index f0af1b20..9f2adf2e 100644 --- a/evopedia-html5/WebContent/evopedia.js +++ b/evopedia-html5/WebContent/evopedia.js @@ -43,6 +43,31 @@ function readIntegerFrom4Bytes(byteArray,firstIndex) { return byteArray[firstIndex] + byteArray[firstIndex+1]*256 + byteArray[firstIndex+2]*65536 + byteArray[firstIndex+3]*16777216; } +/** + * Converts a UTF-8 byte array to JavaScript's 16-bit Unicode. + * @param {Array.} bytes UTF-8 byte array. + * @return {string} 16-bit Unicode string. + * Copied from http://closure-library.googlecode.com/svn/docs/closure_goog_crypt.js.source.html (Apache License 2.0) + */ +function utf8ByteArrayToString(bytes,startIndex,endIndex) { + var out = [], pos = startIndex, c = 0; + while (pos < bytes.length && pos < endIndex) { + var c1 = bytes[pos++]; + if (c1 < 128) { + out[c++] = String.fromCharCode(c1); + } else if (c1 > 191 && c1 < 224) { + var c2 = bytes[pos++]; + out[c++] = String.fromCharCode((c1 & 31) << 6 | c2 & 63); + } else { + var c2 = bytes[pos++]; + var c3 = bytes[pos++]; + out[c++] = String.fromCharCode( + (c1 & 15) << 12 | (c2 & 63) << 6 | c3 & 63); + } + } + return out.join(''); +}; + function readAllTitlesFromIndex(titleFile) { if (titleFile) { var reader = new FileReader(); @@ -88,18 +113,13 @@ function readAllTitlesFromIndex(titleFile) { while (newLineIndex