mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-22 12:01:15 -04:00
Fix offset and length handling for reading an article from the dump
This commit is contained in:
parent
199e44756c
commit
afbfabac85
@ -10,27 +10,29 @@
|
||||
<body>
|
||||
<h1>Evopedia</h1>
|
||||
<br/>
|
||||
Blockstart : <input type="text" id="blockstart" value="0" />
|
||||
<br/>
|
||||
Blockoffset : <input type="text" id="blockoffset" value="0" />
|
||||
<br/>
|
||||
Length : <input type="text" id="length" value="8866" />
|
||||
<br/>
|
||||
<div id="openLocalFiles" style="visibility:hidden">
|
||||
Please pick the file title.idx from the wikipedia_small_2010-08-14 dump :<br/>
|
||||
Please pick the file titles.idx from the wikipedia_small_2010-08-14 dump :<br/>
|
||||
<input type="file" id="titleFile"/><br/>
|
||||
Please pick the file wikipedia_00.dat from the same dump :<br/>
|
||||
<input type="file" id="dataFile"/>
|
||||
</div>
|
||||
<br/>
|
||||
<input type="button" id="readTitle" value="Read title list from index" onclick="readAllTitlesFromIndex(titleFile)" />
|
||||
<input type="button" id="readData" value="Read article from dump" onclick="readArticleFromHtmlForm(dataFile)" />
|
||||
<br/>
|
||||
Choose a title : <select id="titleList" onchange="updateOffsetsFromTitle(this.value)"></select>
|
||||
<br/>
|
||||
Blockstart : <input type="text" id="blockstart" value="0" />
|
||||
<br/>
|
||||
Blockoffset : <input type="text" id="blockoffset" value="0" />
|
||||
<br/>
|
||||
Length : <input type="text" id="length" value="8866" />
|
||||
<br/>
|
||||
<input type="button" id="readData" value="Read article from dump" onclick="readArticleFromHtmlForm(dataFile)" />
|
||||
<br/>
|
||||
<div id="articleContent"> </div>
|
||||
<hr/>
|
||||
<pre id="rawArticleContent"> </pre>
|
||||
<textarea id="rawArticleContent" cols="80" rows="20"> </textarea>
|
||||
<!--<pre id="rawArticleContent"> </pre>-->
|
||||
|
||||
<script type="text/javascript" src="evopedia.js"></script>
|
||||
</body>
|
||||
|
@ -91,7 +91,16 @@ function readAllTitlesFromIndex(titleFile) {
|
||||
for (var j=i+15;j<newLineIndex;j++) {
|
||||
title += String.fromCharCode(byteArray[j]);
|
||||
}
|
||||
|
||||
// TODO : Read the title properly with UTF-8 encoding
|
||||
/*
|
||||
var buf = new ArrayBuffer();
|
||||
var bufView = new Uint16Array(buf);
|
||||
for (var j=0;j<newLineIndex-i-15;j++) {
|
||||
bufView[j]=byteArray[j+i+15];
|
||||
}
|
||||
title = String.fromCharCode(bufView);
|
||||
*/
|
||||
|
||||
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
||||
titleNumber++;
|
||||
i=newLineIndex-1;
|
||||
@ -135,13 +144,18 @@ function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
|
||||
// TODO : should be improved by uncompressing the content chunk by chunk,
|
||||
// until the length is reached, instead of uncompressing everything
|
||||
var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
|
||||
var htmlArticle = htmlArticles.substring(blockoffset,length);
|
||||
// Start reading at offset, and keep 2*length bytes (maximum size in bytes for length characters)
|
||||
var htmlArticle = htmlArticles.substring(blockoffset,blockoffset+length);
|
||||
|
||||
// Keep only length characters
|
||||
htmlArticle = htmlArticle.substring(0,length);
|
||||
// Decode UTF-8 encoding
|
||||
htmlArticle = decodeURIComponent(escape(htmlArticle));
|
||||
|
||||
document.getElementById('articleContent').innerHTML = htmlArticle;
|
||||
// For testing purpose
|
||||
document.getElementById('rawArticleContent').innerHTML = htmlArticle.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
||||
//document.getElementById('rawArticleContent').innerHTML = htmlArticle.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
||||
//document.getElementById('rawArticleContent').value = decodeURIComponent(escape(htmlArticles));
|
||||
};
|
||||
|
||||
//var blob = file;
|
||||
|
Loading…
x
Reference in New Issue
Block a user