From c598de1ac503158b679913a13249d83e3f6d3980 Mon Sep 17 00:00:00 2001 From: mossroy Date: Fri, 28 Dec 2012 20:31:14 +0100 Subject: [PATCH] Preliminary implementation of a binary search in the titles file. It currently only finds the closest index of the search string, and is probably still buggy --- evopedia-html5/WebContent/evopedia.html | 5 +- evopedia-html5/WebContent/evopedia.js | 72 ++++++- .../WebContent/remove_diacritics.js | 190 ++++++++++++++++++ 3 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 evopedia-html5/WebContent/remove_diacritics.js diff --git a/evopedia-html5/WebContent/evopedia.html b/evopedia-html5/WebContent/evopedia.html index d0200b8a..cfd48183 100644 --- a/evopedia-html5/WebContent/evopedia.html +++ b/evopedia-html5/WebContent/evopedia.html @@ -43,7 +43,9 @@ Please pick the files wikipedia_*.dat from the same dump :

- +Find a title :   +
+
Choose a title :
@@ -57,6 +59,7 @@ Length :

+
 

diff --git a/evopedia-html5/WebContent/evopedia.js b/evopedia-html5/WebContent/evopedia.js index 05474336..9a372e8e 100644 --- a/evopedia-html5/WebContent/evopedia.js +++ b/evopedia-html5/WebContent/evopedia.js @@ -93,8 +93,78 @@ function utf8ByteArrayToString(bytes,startIndex,endIndex) { return out.join(''); }; +/** + * This function is recursively called after each asynchronous read, + * so that to find the closest index in titleFile to the given prefix + */ +function recursivePrefixSearch(titleFile, reader, prefix, lo, hi) { + if (lo < hi-1 ) { + var mid = Math.round((lo+hi)/2); + // TODO : improve the way we read this file : 256 bytes is arbitrary and might be too small + var blob = titleFile.slice(mid,mid+256); + reader.onload = function(e) { + var binaryTitleFile = e.target.result; + var byteArray = new Uint8Array(binaryTitleFile); + // Look for the index of the next NewLine + var newLineIndex=0; + while (newLineIndex