mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-23 04:28:30 -04:00
Fix offset and length handling for reading an article from the dump
This commit is contained in:
parent
199e44756c
commit
afbfabac85
@ -10,27 +10,29 @@
|
|||||||
<body>
|
<body>
|
||||||
<h1>Evopedia</h1>
|
<h1>Evopedia</h1>
|
||||||
<br/>
|
<br/>
|
||||||
Blockstart : <input type="text" id="blockstart" value="0" />
|
|
||||||
<br/>
|
|
||||||
Blockoffset : <input type="text" id="blockoffset" value="0" />
|
|
||||||
<br/>
|
|
||||||
Length : <input type="text" id="length" value="8866" />
|
|
||||||
<br/>
|
|
||||||
<div id="openLocalFiles" style="visibility:hidden">
|
<div id="openLocalFiles" style="visibility:hidden">
|
||||||
Please pick the file title.idx from the wikipedia_small_2010-08-14 dump :<br/>
|
Please pick the file titles.idx from the wikipedia_small_2010-08-14 dump :<br/>
|
||||||
<input type="file" id="titleFile"/><br/>
|
<input type="file" id="titleFile"/><br/>
|
||||||
Please pick the file wikipedia_00.dat from the same dump :<br/>
|
Please pick the file wikipedia_00.dat from the same dump :<br/>
|
||||||
<input type="file" id="dataFile"/>
|
<input type="file" id="dataFile"/>
|
||||||
</div>
|
</div>
|
||||||
<br/>
|
<br/>
|
||||||
<input type="button" id="readTitle" value="Read title list from index" onclick="readAllTitlesFromIndex(titleFile)" />
|
<input type="button" id="readTitle" value="Read title list from index" onclick="readAllTitlesFromIndex(titleFile)" />
|
||||||
<input type="button" id="readData" value="Read article from dump" onclick="readArticleFromHtmlForm(dataFile)" />
|
|
||||||
<br/>
|
<br/>
|
||||||
Choose a title : <select id="titleList" onchange="updateOffsetsFromTitle(this.value)"></select>
|
Choose a title : <select id="titleList" onchange="updateOffsetsFromTitle(this.value)"></select>
|
||||||
<br/>
|
<br/>
|
||||||
|
Blockstart : <input type="text" id="blockstart" value="0" />
|
||||||
|
<br/>
|
||||||
|
Blockoffset : <input type="text" id="blockoffset" value="0" />
|
||||||
|
<br/>
|
||||||
|
Length : <input type="text" id="length" value="8866" />
|
||||||
|
<br/>
|
||||||
|
<input type="button" id="readData" value="Read article from dump" onclick="readArticleFromHtmlForm(dataFile)" />
|
||||||
|
<br/>
|
||||||
<div id="articleContent"> </div>
|
<div id="articleContent"> </div>
|
||||||
<hr/>
|
<hr/>
|
||||||
<pre id="rawArticleContent"> </pre>
|
<textarea id="rawArticleContent" cols="80" rows="20"> </textarea>
|
||||||
|
<!--<pre id="rawArticleContent"> </pre>-->
|
||||||
|
|
||||||
<script type="text/javascript" src="evopedia.js"></script>
|
<script type="text/javascript" src="evopedia.js"></script>
|
||||||
</body>
|
</body>
|
||||||
|
@ -91,6 +91,15 @@ function readAllTitlesFromIndex(titleFile) {
|
|||||||
for (var j=i+15;j<newLineIndex;j++) {
|
for (var j=i+15;j<newLineIndex;j++) {
|
||||||
title += String.fromCharCode(byteArray[j]);
|
title += String.fromCharCode(byteArray[j]);
|
||||||
}
|
}
|
||||||
|
// TODO : Read the title properly with UTF-8 encoding
|
||||||
|
/*
|
||||||
|
var buf = new ArrayBuffer();
|
||||||
|
var bufView = new Uint16Array(buf);
|
||||||
|
for (var j=0;j<newLineIndex-i-15;j++) {
|
||||||
|
bufView[j]=byteArray[j+i+15];
|
||||||
|
}
|
||||||
|
title = String.fromCharCode(bufView);
|
||||||
|
*/
|
||||||
|
|
||||||
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
||||||
titleNumber++;
|
titleNumber++;
|
||||||
@ -135,13 +144,18 @@ function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
|
|||||||
// TODO : should be improved by uncompressing the content chunk by chunk,
|
// TODO : should be improved by uncompressing the content chunk by chunk,
|
||||||
// until the length is reached, instead of uncompressing everything
|
// until the length is reached, instead of uncompressing everything
|
||||||
var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
|
var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
|
||||||
var htmlArticle = htmlArticles.substring(blockoffset,length);
|
// Start reading at offset, and keep 2*length bytes (maximum size in bytes for length characters)
|
||||||
|
var htmlArticle = htmlArticles.substring(blockoffset,blockoffset+length);
|
||||||
|
|
||||||
|
// Keep only length characters
|
||||||
|
htmlArticle = htmlArticle.substring(0,length);
|
||||||
// Decode UTF-8 encoding
|
// Decode UTF-8 encoding
|
||||||
htmlArticle = decodeURIComponent(escape(htmlArticle));
|
htmlArticle = decodeURIComponent(escape(htmlArticle));
|
||||||
|
|
||||||
document.getElementById('articleContent').innerHTML = htmlArticle;
|
document.getElementById('articleContent').innerHTML = htmlArticle;
|
||||||
// For testing purpose
|
// For testing purpose
|
||||||
document.getElementById('rawArticleContent').innerHTML = htmlArticle.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
//document.getElementById('rawArticleContent').innerHTML = htmlArticle.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
||||||
|
//document.getElementById('rawArticleContent').value = decodeURIComponent(escape(htmlArticles));
|
||||||
};
|
};
|
||||||
|
|
||||||
//var blob = file;
|
//var blob = file;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user