mirror of
https://github.com/kiwix/kiwix-js.git
synced 2025-09-24 04:54:51 -04:00
First usable version of article searching
Removed possibility to list all articles (much too slow) Added the removal of diacritics (accents etc) in the user-typed prefix
This commit is contained in:
parent
c598de1ac5
commit
638d2f8815
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
evopedia-html5/WebContent/evopedia.html~
|
||||
evopedia-html5/WebContent/evopedia.js~
|
||||
evopedia-html5/WebContent/remove_diacritics.js~
|
||||
|
@ -31,6 +31,7 @@ License:
|
||||
|
||||
<title>Evopedia HTML5</title>
|
||||
<script type="text/javascript" src="bzip2-antimatter15.js"></script>
|
||||
<script type="text/javascript" src="remove_diacritics.js"></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
@ -43,11 +44,9 @@ Please pick the files wikipedia_*.dat from the same dump :<br/>
|
||||
<input type="file" id="dataFiles" multiple="true"/>
|
||||
</div>
|
||||
<br/>
|
||||
Find a title : <input type="text" id="prefix" value="" /> <input type="button" id="searchTitles" value="Search titles" onclick="searchTitlesFromPrefix(titleFile,document.getElementById('prefix').value)" />
|
||||
Find titles from the prefix : <input type="text" id="prefix" value="" onkeyup="onKeyUpPrefix(event)" /> <input type="button" id="searchTitles" value="Search titles" onclick="searchTitlesFromPrefix(titleFile,document.getElementById('prefix').value)" />
|
||||
<br/>
|
||||
<input type="button" id="readTitle" value="Read all title list from index" onclick="readAllTitlesFromIndex(titleFile)" />
|
||||
<br/>
|
||||
Choose a title : <select id="titleList" onchange="updateOffsetsFromTitle(this.value)"></select>
|
||||
Choose a title from the filtered list : <select id="titleList" onchange="updateOffsetsFromTitle(this.value)"></select>
|
||||
<br/>
|
||||
File number : <input type="text" id="filenumber" value="0" />
|
||||
<br/>
|
||||
@ -62,7 +61,7 @@ Length : <input type="text" id="length" value="8866" />
|
||||
<!-- TODO : add CSS styles -->
|
||||
<div id="articleContent"> </div>
|
||||
<hr/>
|
||||
<textarea id="rawArticleContent" cols="80" rows="20"> </textarea>
|
||||
<textarea id="debugZone" cols="80" rows="20"> </textarea>
|
||||
|
||||
<script type="text/javascript" src="evopedia.js"></script>
|
||||
</body>
|
||||
|
@ -131,6 +131,8 @@ function recursivePrefixSearch(titleFile, reader, prefix, lo, hi) {
|
||||
reader.readAsArrayBuffer(blob);
|
||||
}
|
||||
else {
|
||||
// We found the closest title
|
||||
//alert ("Found the closest title near index "+lo);
|
||||
readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,lo);
|
||||
}
|
||||
}
|
||||
@ -142,7 +144,7 @@ function recursivePrefixSearch(titleFile, reader, prefix, lo, hi) {
|
||||
function searchTitlesFromPrefix(titleFile, prefix) {
|
||||
if (titleFile) {
|
||||
var titleFileSize = titleFile.size;
|
||||
// TODO : normalize the prefix (remove accents etc)
|
||||
prefix = normalizeString(prefix);
|
||||
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
@ -156,77 +158,73 @@ function searchTitlesFromPrefix(titleFile, prefix) {
|
||||
}
|
||||
}
|
||||
|
||||
function readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,index) {
|
||||
// We found the closest title
|
||||
alert ("Found the closest title at index "+index);
|
||||
// TODO : read the following titles, stopping when the title does not start with the prefix any more (or at a maximum number of titles)
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all the titles from the index file, and populate the dropdown list
|
||||
* Warning : only usable on very small dumps. It is much too long on normal dumps
|
||||
* Read the titles following the given index in the title file, until one of the following conditions is reached :
|
||||
* - the title does not start with the prefix anymore
|
||||
* - we already read the maximum number of titles
|
||||
* and populate the dropdown list
|
||||
*/
|
||||
function readAllTitlesFromIndex(titleFile) {
|
||||
if (titleFile) {
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
reader.onabort = function(e) {
|
||||
alert('Title file read cancelled');
|
||||
};
|
||||
reader.onload = function(e) {
|
||||
var binaryTitleFile = e.target.result;
|
||||
var byteArray = new Uint8Array(binaryTitleFile);
|
||||
function readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,startIndex) {
|
||||
var reader = new FileReader();
|
||||
reader.onerror = errorHandler;
|
||||
reader.onabort = function(e) {
|
||||
alert('Title file read cancelled');
|
||||
};
|
||||
reader.onload = function(e) {
|
||||
var binaryTitleFile = e.target.result;
|
||||
var byteArray = new Uint8Array(binaryTitleFile);
|
||||
// Look for the index of the next NewLine
|
||||
var newLineIndex=0;
|
||||
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=128) {
|
||||
newLineIndex++;
|
||||
}
|
||||
var i = newLineIndex;
|
||||
var titleNumber=-1;
|
||||
var comboTitleList = document.getElementById('titleList');
|
||||
while (i<byteArray.length && titleNumber<50) {
|
||||
var filenumber = 0;
|
||||
var blockstart = 0;
|
||||
var blockoffset = 0;
|
||||
var length = 0;
|
||||
var title = "";
|
||||
|
||||
// TODO : interpret escape area
|
||||
var escape1 = byteArray[i];
|
||||
var escape2 = byteArray[i+1];
|
||||
filenumber = byteArray[i+2];
|
||||
|
||||
var i = 0;
|
||||
var titleNumber=0;
|
||||
var comboTitleList = document.getElementById('titleList');
|
||||
blockstart = readIntegerFrom4Bytes(byteArray,i+3);
|
||||
blockoffset = readIntegerFrom4Bytes(byteArray,i+7);
|
||||
length = readIntegerFrom4Bytes(byteArray,i+11);
|
||||
var newLineIndex = i+15;
|
||||
|
||||
while (i<byteArray.length) {
|
||||
var filenumber = 0;
|
||||
var blockstart = 0;
|
||||
var blockoffset = 0;
|
||||
var length = 0;
|
||||
var title = "";
|
||||
|
||||
// TODO : interpret escape area
|
||||
var escape1 = byteArray[i];
|
||||
var escape2 = byteArray[i+1];
|
||||
filenumber = byteArray[i+2];
|
||||
|
||||
blockstart = readIntegerFrom4Bytes(byteArray,i+3);
|
||||
blockoffset = readIntegerFrom4Bytes(byteArray,i+7);
|
||||
length = readIntegerFrom4Bytes(byteArray,i+11);
|
||||
var newLineIndex = i+15;
|
||||
|
||||
// Look for the index of NewLine
|
||||
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=128) {
|
||||
newLineIndex++;
|
||||
}
|
||||
|
||||
title = utf8ByteArrayToString(byteArray,i+15,newLineIndex);
|
||||
|
||||
if (title) {
|
||||
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
||||
}
|
||||
titleNumber++;
|
||||
i=newLineIndex-1;
|
||||
// Look for the index of the next NewLine
|
||||
while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=128) {
|
||||
newLineIndex++;
|
||||
}
|
||||
};
|
||||
|
||||
var blob = titleFile;
|
||||
|
||||
// Read in the file as a binary string
|
||||
reader.readAsArrayBuffer(blob);
|
||||
}
|
||||
else {
|
||||
alert('Title file not set');
|
||||
}
|
||||
title = utf8ByteArrayToString(byteArray,i+15,newLineIndex);
|
||||
// Skip the first title
|
||||
if (titleNumber>=0 && title) {
|
||||
// TODO : check if the title starts with prefix, and return if it does not
|
||||
comboTitleList.options[titleNumber] = new Option (title, filenumber+"|"+blockstart+"|"+blockoffset+"|"+length);
|
||||
}
|
||||
titleNumber++;
|
||||
i=newLineIndex-1;
|
||||
}
|
||||
// Run onchange on the combo, so that to read the value of the selected item (first one)
|
||||
comboTitleList.onchange();
|
||||
};
|
||||
var blob = titleFile.slice(startIndex);
|
||||
// Read in the file as a binary string
|
||||
reader.readAsArrayBuffer(blob);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decompress and read an article in dump files
|
||||
*/
|
||||
function readArticleFromHtmlForm(dataFiles) {
|
||||
document.getElementById("articleContent").innerHTML="Loading article from dump...";
|
||||
if (dataFiles && dataFiles.length>0) {
|
||||
var filenumber = document.getElementById('filenumber').value;
|
||||
var blockstart = document.getElementById('blockstart').value;
|
||||
@ -289,7 +287,7 @@ function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
|
||||
|
||||
document.getElementById('articleContent').innerHTML = htmlArticle;
|
||||
// For testing purpose
|
||||
//document.getElementById('rawArticleContent').value = htmlArticle;
|
||||
//document.getElementById('debugZone').value = htmlArticle;
|
||||
};
|
||||
|
||||
// TODO : should be improved by reading the file chunks by chunks until the article is found,
|
||||
@ -322,3 +320,12 @@ function handleDataFileSelect(evt) {
|
||||
function handleTitleFileSelect(evt) {
|
||||
titleFile = evt.target.files[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle Enter key in the prefix input zone
|
||||
*/
|
||||
function onKeyUpPrefix(evt) {
|
||||
if (evt.keyCode == 13) {
|
||||
document.getElementById("searchTitles").click();
|
||||
}
|
||||
}
|
||||
|
@ -182,9 +182,10 @@ var diacriticsMap = [
|
||||
];
|
||||
|
||||
|
||||
|
||||
for(var i=0; i<diacriticsMap.length; i++) {
|
||||
|
||||
permalink = stringWithDiacritics.replace(diacriticsMap[i].letters, diacriticsMap[i].base);
|
||||
|
||||
function normalizeString(string) {
|
||||
var normalizedString=string;
|
||||
for(var i=0; i<diacriticsMap.length; i++) {
|
||||
normalizedString = normalizedString.replace(diacriticsMap[i].letters, diacriticsMap[i].base);
|
||||
}
|
||||
return normalizedString;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user