Big refactoring of the code, in a more object-oriented way.

But I could not push it as much as I would like : the FileReader API is asynchronous, which prevents some implementations (ex : a constructor in a class cannot read files) The title searching is not very accurate for now, which is why the links do not work very well for now
2025-09-22 12:01:15 -04:00 · 2013-03-15 13:03:48 +01:00 · 2013-03-15 13:03:48 +01:00 · 7802cbf3b8
commit 7802cbf3b8
parent 6c5020b42a
3 changed files with 472 additions and 391 deletions
--- a/www/index.html
+++ b/www/index.html
@ -48,16 +48,16 @@ License:
 	<br />
 	To use it, you have to first download locally a dump from <a href="http://dumpathome.evopedia.info/dumps/finished">http://dumpathome.evopedia.info/dumps/finished</a> (with a Bittorrent client), and select some of the dowloaded files below.
 	<br />
-	Current status : I have tested it with the <a href="http://evopedia.info/dumps/wikipedia_small_2010-08-14.torrent">small dump (2010-08-14)</a>, the <a href="http://evopedia.info/dumps/wikipedia_fr_2012-02-03.torrent">French dump (2012-02-03)</a>, the <a href="http://evopedia.info/dumps/wikipedia_frwiktionary_2011-03-16.torrent">French wiktionary dump (2011-03-16)</a> and the <a href="http://evopedia.info/dumps/wikipedia_en_2012-02-11.torrent">English dump (2012-02-11)</a>
+	I have tested it with the <a href="http://evopedia.info/dumps/wikipedia_small_2010-08-14.torrent">small dump (2010-08-14)</a>, the <a href="http://evopedia.info/dumps/wikipedia_fr_2012-02-03.torrent">French dump (2012-02-03)</a>, the <a href="http://evopedia.info/dumps/wikipedia_frwiktionary_2011-03-16.torrent">French wiktionary dump (2011-03-16)</a> and the <a href="http://evopedia.info/dumps/wikipedia_en_2012-02-11.torrent">English dump (2012-02-11)</a>
 	<br />
 	<br />
 	<ul>
-	<li>On desktops, it works at least on recent Firefox and Chrome</li>
+	<li>On desktops, it works on recent Firefox and Chrome, and maybe on other browsers</li>
 	<li>On the Firefos OS simulator, you have (for now) to put the small dump files in a "fake-sdcard" folder of your firefox profile (ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/profile/fake-sdcard). It looks for wikipedia_small_2010-08-14/titles.idx in it. You also need to install the application from the dashboard of the simulator instead of accessing via the browser (due to security restrictions in Firefox OS : only certified webapps can access the sdcard)</li>
 	<li>I could not test it on a real Firefox OS device : if someone did, please let me know</li>
 	</ul>
 	<br />
-	It's only a proof of concept sor far : there are certainly many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular, the performance can be optimized when reading an article. I also know the links inside an article do not work very well for now.
+	It's only a proof of concept so far : there are many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular, the performance can be optimized when reading an article. I also know the searches are not always very accurate, and the links inside an article do not work well for now.
 	<br />
 	<div id="openLocalFiles" style="display: none;">
 		<br /> Please select the file titles.idx :<br /> <input type="file"
--- a/www/js/app.js
+++ b/www/js/app.js
@ -18,419 +18,200 @@ define(function(require) {
    require('./install-button');

    // Evopedia javascript dependencies
-    var bzip2 = require('bzip2');
-    var remove_diacritics = require('remove_diacritics');
    var evopedia = require('evopedia');


-var dataFiles=document.getElementById('dataFiles').files;
-var titleFile=document.getElementById('titleFile').files[0];
+    var localArchive = null;
+    setLocalArchiveFromFileSelect();

-// Define behavior of HTML elements
-$('#searchTitles').on('click', function(e) {
-searchTitlesFromPrefix(titleFile,$('#prefix').val());
-});
-$('#titleList').on('change', function(e) {
-updateOffsetsFromTitle(this.value);
-});
-$('#toggleDebug').on('click', function(e) {
-switchDebugOnOff();
-});
-$('#readData').on('click', function(e) {
-readArticleFromHtmlForm(dataFiles);
-});
-$('#prefix').on('keyup', function(e) {
-onKeyUpPrefix(e);
-});
+    // Define behavior of HTML elements
+    $('#searchTitles').on('click', function(e) {
+    	searchTitlesFromPrefix($('#prefix').val());
+    });
+    $('#toggleDebug').on('click', function(e) {
+    	switchDebugOnOff();
+    });
+    $('#readData').on('click', function(e) {
+    	findTitleFromTitleIdAndLaunchArticleRead($('#titleList').val());
+    });
+    $('#prefix').on('keyup', function(e) {
+    	onKeyUpPrefix(e);
+    });


-// Detect if DeviceStorage is available
-var storage = null;
-if ($.isFunction(navigator.getDeviceStorage)) {
-	storage = navigator.getDeviceStorage('sdcard');
-}
+    // Detect if DeviceStorage is available
+    var storage = null;
+    if ($.isFunction(navigator.getDeviceStorage)) {
+    	storage = navigator.getDeviceStorage('sdcard');
+    }

-if (storage != null) {
-	var filerequest = storage.get('wikipedia_small_2010-08-14/wikipedia_00.dat');
-	filerequest.onsuccess = function() {
-		dataFiles = [];
-		dataFiles[0] = filerequest.result;
-		filerequest = storage.get('wikipedia_small_2010-08-14/titles.idx');
-		filerequest.onsuccess = function() {
-			titleFile = filerequest.result;
-		};
-		filerequest.onerror = function(event) {
-			alert("error reading title file : " + event.target.error.name);
-		};
-	};
-	filerequest.onerror = function(event) {
-		alert("error reading data file : " + event.target.error.name);
-	};
-}
-else {
-	displayFileSelect();
-}
+    if (storage != null) {
+    	var filerequest = storage.get('wikipedia_small_2010-08-14/wikipedia_00.dat');
+    	filerequest.onsuccess = function() {
+    		localArchive = new evopedia.LocalArchive();
+    		localArchive.dataFiles[0] = filerequest.result;
+    		filerequest = storage.get('wikipedia_small_2010-08-14/titles.idx');
+    		filerequest.onsuccess = function() {
+    			localArchive.titleFile = filerequest.result;
+    		};
+    		filerequest.onerror = function(event) {
+    			alert("error reading title file : " + event.target.error.name);
+    		};
+    	};
+    	filerequest.onerror = function(event) {
+    		alert("error reading data file : " + event.target.error.name);
+    	};
+    }
+    else {
+    	displayFileSelect();
+    }

-/**
- * Displays the zone to select files from the dump
- */
-function displayFileSelect() {
-	$('#openLocalFiles').show();
-	document.getElementById('dataFiles').addEventListener('change', handleDataFileSelect, false);
-	document.getElementById('titleFile').addEventListener('change', handleTitleFileSelect, false);
-}
-
-var debugOn = false;
-
-/**
- * Print the given string inside the debug zone
- * @param string
- */
-function debug(string) {
-	if (debugOn) {
-		document.getElementById("debugTextarea").value+=string+"\n";
+    /**
+	 * Displays the zone to select files from the dump
+	 */
+	function displayFileSelect() {
+		$('#openLocalFiles').show();
+		$('#dataFiles').on('change', setLocalArchiveFromFileSelect);
+		$('#titleFile').on('change', setLocalArchiveFromFileSelect);
 	}
-}

-/**
- * Switch debug mode On/Off
- */
-function switchDebugOnOff() {
-	if (debugOn == true) {
-		debugOn = false;
-		$('#debugZone').hide();
-	}
-	else {
-		debugOn = true;
-		$('#debugZone').show();
-	}
-}
+	var debugOn = false;

-/**
- * Set the Offsets HTML fields from the selected title
- */
-function updateOffsetsFromTitle(selectValue) {
-	var offsets=selectValue.split(/\|/);
-	document.getElementById("filenumber").value=offsets[0];
-	document.getElementById("blockstart").value=offsets[1];
-	document.getElementById("blockoffset").value=offsets[2];
-	document.getElementById("length").value=offsets[3];
-	if (offsets[0]==255) {
-		// It's a redirect : find out the real offsets (asynchronous read)
-		readRedirectOffsets(titleFile,offsets[1]);
-	}
-	else {
-		document.getElementById('redirectfilenumber').value = "";
-		document.getElementById('redirectblockstart').value = "";
-		document.getElementById('redirectblockoffset').value = "";
-		document.getElementById('redirectlength').value = "";
-	}
-}
-
-/**
- * This function is recursively called after each asynchronous read,
- * so that to find the closest index in titleFile to the given prefix
- */
-function recursivePrefixSearch(titleFile, reader, prefix, lo, hi) {
-	if (lo < hi-1 ) {
-		var mid = Math.round((lo+hi)/2);
-		// TODO : improve the way we read this file : 256 bytes is arbitrary and might be too small
-		var blob = titleFile.slice(mid,mid+256);
-		reader.onload = function(e) {
-			var binaryTitleFile = e.target.result;
-			var byteArray = new Uint8Array(binaryTitleFile);
-			// Look for the index of the next NewLine
-			var newLineIndex=0;	
-			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
-				newLineIndex++;
-			}
-			var i = newLineIndex+1;
-			newLineIndex = i+15;
-			// Look for the index of the next NewLine	
-			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
-				newLineIndex++;
-			}
-			var title = evopedia.utf8ByteArrayToString(byteArray,i+15,newLineIndex);
-			debug("title found : "+title);
-			if (title.localeCompare(prefix)<0) {
-				lo = mid;
-			}
-			else {
-				hi = mid;
-			}
-			recursivePrefixSearch(titleFile, reader, prefix, lo, hi);
-		};
-		debug("Reading the file from "+mid+" to "+(mid+256)+" because lo="+lo+" and hi="+hi);			
-		// Read the file as a binary string
-		reader.readAsArrayBuffer(blob);		
-	}
-	else {
-		// We found the closest title
-		debug ("Found the closest title near index "+lo);
-		readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,lo);
-	}
-}
-
-/**
- * Search the index for titles that start with the given prefix
- * (implemented with a binary search inside the index file)
- */
-function searchTitlesFromPrefix(titleFile, prefix) {
-	if (titleFile) {
-		var titleFileSize = titleFile.size;
-		prefix = remove_diacritics.normalizeString(prefix);
-
-		var reader = new FileReader();
-		reader.onerror = errorHandler;
-		reader.onabort = function(e) {
-			alert('Title file read cancelled');
-		};
-		recursivePrefixSearch(titleFile, reader, prefix, 0, titleFileSize);
-	}
-	else {
-		alert ("Title file not set");
-	}
-}
-
-/**
- * Read the real offsets when a redirect was found, based on the redirectIndex provided
- * The file read is asynchronous, and populates the html form as soon as the offsets are found
- * @param titleFile
- * @param redirectIndex
- */
-function readRedirectOffsets(titleFile,redirectIndex) {
-	var reader = new FileReader();
-	reader.onerror = errorHandler;
-	reader.onabort = function(e) {
-		alert('Title file read cancelled');
-	};
-	reader.onload = function(e) {
-		var binaryTitleFile = e.target.result;
-		var byteArray = new Uint8Array(binaryTitleFile);
-		var filenumber = byteArray[2];
-
-		var blockstart = evopedia.readIntegerFrom4Bytes(byteArray,3);
-		var blockoffset = evopedia.readIntegerFrom4Bytes(byteArray,7);
-		var length = evopedia.readIntegerFrom4Bytes(byteArray,11);
-
-		document.getElementById('redirectfilenumber').value = filenumber;
-		document.getElementById('redirectblockstart').value = blockstart;
-		document.getElementById('redirectblockoffset').value = blockoffset;
-		document.getElementById('redirectlength').value = length;
-	};
-	// Read only the 16 necessary bytes
-	var blob = titleFile.slice(redirectIndex,redirectIndex+16);
-	// Read in the file as a binary string
-	reader.readAsArrayBuffer(blob);
-}
-
-/**
- * Read the titles following the given index in the title file, until one of the following conditions is reached :
- * - the title does not start with the prefix anymore
- * - we already read the maximum number of titles
- * and populate the dropdown list
- */
-function readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,startIndex) {
-	var reader = new FileReader();
-	reader.onerror = errorHandler;
-	reader.onabort = function(e) {
-		alert('Title file read cancelled');
-	};
-	reader.onload = function(e) {
-		var binaryTitleFile = e.target.result;
-		var byteArray = new Uint8Array(binaryTitleFile);
-		// Look for the index of the next NewLine
-		var newLineIndex=0;	
-		while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
-			newLineIndex++;
+	/**
+	 * Print the given string inside the debug zone
+	 * 
+	 * @param string
+	 */
+	function debug(string) {
+		if (debugOn) {
+			document.getElementById("debugTextarea").value += string + "\n";
 		}
-		var i = newLineIndex;
-		var titleNumber=0;
+	}
+
+	/**
+	 * Switch debug mode On/Off
+	 */
+	function switchDebugOnOff() {
+		if (debugOn == true) {
+			debugOn = false;
+			$('#debugZone').hide();
+		} else {
+			debugOn = true;
+			$('#debugZone').show();
+		}
+	}
+
+	function setLocalArchiveFromFileSelect() {
+		dataFiles=document.getElementById('dataFiles').files;
+		titleFile=document.getElementById('titleFile').files[0];
+		localArchive = new evopedia.LocalArchive();
+		localArchive.dataFiles = dataFiles;
+		localArchive.titleFile = titleFile;
+	}
+
+	/**
+	 * Handle Enter key in the prefix input zone
+	 */
+	function onKeyUpPrefix(evt) {
+		if (evt.keyCode == 13) {
+			document.getElementById("searchTitles").click();
+		}
+	}
+
+
+	
+	/**
+	 * Search the index for titles that start with the given prefix (implemented
+	 * with a binary search inside the index file)
+	 */
+	function searchTitlesFromPrefix(prefix) {
+		if (localArchive.titleFile) {
+			localArchive.findTitlesWithPrefix(prefix, populateDropDownListOfTitles);
+		} else {
+			alert("Title file not set");
+		}
+	}
+
+	/**
+	 * Populate the drop-down list of titles with the given list
+	 */
+	function populateDropDownListOfTitles(titleList) {
 		var comboTitleList = document.getElementById('titleList');
-		while (i<byteArray.length && titleNumber<50) {
-			// Look for the index of the next NewLine
-			newLineIndex+=15;
-			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
-				newLineIndex++;
-			}
-			
-			// Copy the encodedTitle in a new Array
-			var encodedTitle = new Uint8Array(newLineIndex-i);
-			for (var j = 0; j < newLineIndex-i; j++) {
-				encodedTitle[j] = byteArray[i+j];
-			}
-
-			var title = evopedia.Title.parseTitle(encodedTitle, new evopedia.LocalArchive(), i);
-			
-			// Skip the titles that do not start with the prefix
-			// TODO use a normalizer to compare the strings
-			if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
-				comboTitleList.options[titleNumber] = new Option (title.name, title.fileNr + "|" + title.blockStart + "|" + title.blockOffset + "|" + title.articleLength);
-				debug("Title : startIndex = " + i + " endIndex = " + newLineIndex + title.toString());
-				titleNumber++;
-			}
-			i=newLineIndex+1;
+		for (var i=0; i<titleList.length; i++) {
+			var title = titleList[i];
+			comboTitleList.options[i] = new Option (title.name, title.toStringId());
 		}
-		// Update the offsets, as if the first item of the list was selected by the user
-		updateOffsetsFromTitle($('#titleList').val());
-	};
-	var blob = titleFile.slice(startIndex);
-	// Read in the file as a binary string
-	reader.readAsArrayBuffer(blob);
-}
+	}


-/**
- * Decompress and read an article in dump files
- */
-function readArticleFromHtmlForm(dataFiles) {
-	document.getElementById("articleContent").innerHTML="Loading article from dump...";
-	if (dataFiles && dataFiles.length>0) {
-		var filenumber = document.getElementById('filenumber').value;
-		var blockstart = document.getElementById('blockstart').value;
-		var blockoffset = document.getElementById('blockoffset').value;
-		var length = document.getElementById('length').value;
-		if (filenumber==255) {
-			// It's a redirect : use redirected offsets
-			filenumber = document.getElementById('redirectfilenumber').value;
-			blockstart = document.getElementById('redirectblockstart').value;
-			blockoffset = document.getElementById('redirectblockoffset').value;
-			length = document.getElementById('redirectlength').value;
-			if (!filenumber || filenumber=="") {
-				// TODO : better handle this case
-				alert("Redirect offsets not read yet");
-			}
-		}
-		var dataFile = null;
-		// Find the good dump file
-		for (var i=0; i<dataFiles.length; i++) {
-			var fileName = dataFiles[i].name;
-			var prefixedFileNumber = "";
-			if (filenumber<10) {
-				prefixedFileNumber = "0"+filenumber;
+	/**
+	 * Creates an instance of title from given titleId (including resolving redirects),
+	 * and call the function to read the corresponding article
+	 */
+	function findTitleFromTitleIdAndLaunchArticleRead(titleId) {
+		$("#articleContent").html("Loading article from dump...");
+		if (localArchive.dataFiles && localArchive.dataFiles.length>0) {
+			var title = evopedia.Title.parseTitleId(localArchive,titleId);
+			if (title.fileNr == 255) {
+				localArchive.resolveRedirect(title, readArticle);
 			}
 			else {
-				prefixedFileNumber = filenumber;
+				readArticle(title);
 			}
-			var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
-			// Check if the fileName ends with the expected file name (in case of DeviceStorage usage, the fileName is prefixed by the directory)
-			if (fileName.match(expectedFileName+"$") == expectedFileName) {
-				dataFile = dataFiles[i];
-			}
-		}
-		if (!dataFile) {
-			alert("File number " + filenumber + " not found");
-			document.getElementById("articleContent").innerHTML="";
 		}
 		else {
-			readArticleFromOffset(dataFile, blockstart, blockoffset, length);
+			alert("Data files not set");
 		}
 	}
-	else {
-		alert("Data files not set");
+
+	/**
+	 * Read the article corresponding to the given title
+	 */
+	function readArticle(title) {
+		if ($.isArray(title)) {
+			title = title[0];
+		}
+		localArchive.readArticle(title, displayArticleInForm);
 	}
-}
-
-/**
- * Read an article in a dump file, based on given offsets
- */
-function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
-
-	var reader = new FileReader();
-	reader.onerror = errorHandler;
-	reader.onabort = function(e) {
-		alert('Data file read cancelled');
-	};
-	reader.onload = function(e) {
-		var compressedArticles = e.target.result;
-		//var htmlArticle = ArchUtils.bz2.decode(compressedArticles);
-		// TODO : should be improved by uncompressing the content chunk by chunk,
-		// until the length is reached, instead of uncompressing everything
-		var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
-		// Start reading at offset, and keep length characters
-		var htmlArticle = htmlArticles.substring(blockoffset,blockoffset+length);
-		// Keep only length characters
-		htmlArticle = htmlArticle.substring(0,length);
-		// Decode UTF-8 encoding
-		htmlArticle = decodeURIComponent(escape(htmlArticle));

+	/**
+	 * Display the the given HTML article in the web page,
+	 * and convert links to javascript calls
+	 */
+	function displayArticleInForm(htmlArticle) {
 		// Display the article inside the web page.		
 		$('#articleContent').html(htmlArticle);
-
+		
 		// Convert links into javascript calls
 		$('#articleContent').find('a').each(function(){
-            // Store current link's url
-            var url = $(this).attr("href");
-            
-            if(url.slice(0, 1) == "#") {
-                // It's an anchor link : do nothing
-            }
-            else if (url.substring(0,4) === "http") {
-            	// It's an external link : do nothing
-            }
-            else {
-            	// It's a link to another article : add an onclick event to go to this article
-            	// instead of following the link
-            	$(this).on('click', function(e) {
-              	   goToArticle($(this).attr("href"));
-              	   return false;
-                 });
-            }
-
-        });
-	};
-
-	// TODO : should be improved by reading the file chunks by chunks until the article is found,
-	// instead of reading the whole file starting at blockstart
-	var blob = dataFile.slice(blockstart);
-
-	// Read in the image file as a binary string.
-	reader.readAsArrayBuffer(blob);
-}
-
-function errorHandler(evt) {
-	switch(evt.target.error.code) {
-	case evt.target.error.NOT_FOUND_ERR:
-		alert('File Not Found!');
-		break;
-	case evt.target.error.NOT_READABLE_ERR:
-		alert('File is not readable');
-		break;
-	case evt.target.error.ABORT_ERR:
-		break; // noop
-	default:
-		alert('An error occurred reading this file.');
-	};
-}
-
-function handleDataFileSelect(evt) {
-	dataFiles = evt.target.files;
-}
-
-function handleTitleFileSelect(evt) {
-	titleFile = evt.target.files[0];
-}
-
-/**
- * Handle Enter key in the prefix input zone
- */
-function onKeyUpPrefix(evt) {
-	if (evt.keyCode == 13) {
-		document.getElementById("searchTitles").click();
+			// Store current link's url
+			var url = $(this).attr("href");
+			
+			if(url.slice(0, 1) == "#") {
+				// It's an anchor link : do nothing
+			}
+			else if (url.substring(0,4) === "http") {
+				// It's an external link : do nothing
+			}
+			else {
+				// It's a link to another article : add an onclick event to go to this article
+				// instead of following the link
+				$(this).on('click', function(e) {
+					goToArticle($(this).attr("href"));
+					return false;
+				});
+			}
+		});
 	}
-}

-/**
- * Replace article content with the one of the given title
- */
-function goToArticle(title) {
-	// This is awful and does not work very well.
-	// It's just temporary before the algorithm is rewritten in an object-oriented way 
-	// TODO : rewrite this with a real article search and display
-	searchTitlesFromPrefix(titleFile,title);
-	updateOffsetsFromTitle($('#titleList').val());
-	document.getElementById("articleContent").innerHTML="";
-}
+
+	/**
+	 * Replace article content with the one of the given title
+	 */
+	function goToArticle(title) {
+		$("#articleContent").html("Loading article from dump...");
+		localArchive.getTitleByName(title, readArticle);
+	}

 });

--- a/www/js/lib/evopedia.js
+++ b/www/js/lib/evopedia.js
@ -1,5 +1,9 @@
 define(function(require) {
 	
+	// Module dependencies
+	var remove_diacritics = require('remove_diacritics');
+	var bzip2 = require('bzip2');
+	
 	/**
 	 * Read an integer encoded in 4 bytes
 	 */
@ -37,11 +41,263 @@ define(function(require) {
 	 * It's still minimal for now. TODO : complete implementation to handle maths and coordinates
 	 */
 	function LocalArchive() {
-		this.directory = null;
+		this.dataFiles = new Array();
 		this.titleFile = null;
-		this.date = null;
-		this.language = null;
-	}
+		// TODO to be replaced by the real archive attributes
+		this.date = "2013-03-14";
+		this.language = "zz";
+	};
+	
+	/**
+	 * This function is recursively called after each asynchronous read,
+	 * so that to find the closest index in titleFile to the given prefix
+	 * When found, call the callbackFunction with the index
+	 * @param reader
+	 * @param prefix
+	 * @param lo
+	 * @param hi
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.recursivePrefixSearch = function(reader, prefix, lo, hi, callbackFunction) {
+		if (lo < hi-1 ) {
+			var mid = Math.round((lo+hi)/2);
+			// TODO : improve the way we read this file : 128 bytes is arbitrary and might be too small
+			var blob = this.titleFile.slice(mid,mid+128);
+			var currentLocalArchiveInstance = this;
+			reader.onload = function(e) {
+				var binaryTitleFile = e.target.result;
+				var byteArray = new Uint8Array(binaryTitleFile);
+				// Look for the index of the next NewLine
+				var newLineIndex=0;	
+				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
+					newLineIndex++;
+				}
+				var i = newLineIndex+1;
+				newLineIndex = i+15;
+				// Look for the index of the next NewLine	
+				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
+					newLineIndex++;
+				}
+				var title = utf8ByteArrayToString(byteArray,i+15,newLineIndex);
+				if (title.localeCompare(prefix)<0) {
+					lo = mid;
+				}
+				else {
+					hi = mid;
+				}
+				currentLocalArchiveInstance.recursivePrefixSearch(reader, prefix, lo, hi, callbackFunction);
+			};		
+			// Read the file as a binary string
+			reader.readAsArrayBuffer(blob);		
+		}
+		else {
+			// We found the closest title at index lo
+			callbackFunction(lo);
+		}
+	};
+	
+	/**
+	 * Look for a title in the title file at the given offset, and call the callbackFunction with this Title
+	 * @param titleOffset
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.getTitleAtOffset = function(titleOffset, callbackFunction) {
+		this.getTitlesStartingAtOffset(titleOffset, 1, callbackFunction);
+	};
+	
+	/**
+	 * Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances
+	 * @param titleOffset
+	 * @param titleCount maximum number of titles to retrieve
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.getTitlesStartingAtOffset = function(titleOffset, titleCount, callbackFunction) {
+		var reader = new FileReader();
+		reader.onerror = errorHandler;
+		reader.onabort = function(e) {
+			alert('Title file read cancelled');
+		};
+		
+		var currentLocalArchiveInstance = this;
+		reader.onload = function(e) {
+			var binaryTitleFile = e.target.result;
+			var byteArray = new Uint8Array(binaryTitleFile);
+			// Look for the index of the next NewLine
+			var newLineIndex=0;	
+			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
+				newLineIndex++;
+			}
+			var i = newLineIndex;
+			var titleNumber=-1;
+			var titleList = new Array();
+			while (i<byteArray.length && titleNumber<titleCount) {
+				// Look for the index of the next NewLine
+				newLineIndex+=15;
+				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
+					newLineIndex++;
+				}
+				
+				// Copy the encodedTitle in a new Array
+				var encodedTitle = new Uint8Array(newLineIndex-i);
+				for (var j = 0; j < newLineIndex-i; j++) {
+					encodedTitle[j] = byteArray[i+j];
+				}
+
+				var title = Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i);
+				
+				// Skip the titles that do not start with the prefix
+				// TODO use a normalizer to compare the strings
+				// TODO see why we need to skip the first title
+				//if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
+				if (titleNumber>=0) {
+					titleList[titleNumber] = title;
+				}
+				titleNumber++;
+				i=newLineIndex+1;
+			}
+			callbackFunction(titleList);
+		};
+		var blob = this.titleFile.slice(titleOffset);
+		// Read in the file as a binary string
+		reader.readAsArrayBuffer(blob);
+	};
+	
+	/**
+	 * Look for a title by its name, and call the callbackFunction with this Title
+	 * @param titleName
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
+		var titleFileSize = this.titleFile.size;
+		var reader = new FileReader();
+		reader.onerror = errorHandler;
+		reader.onabort = function(e) {
+			alert('Title file read cancelled');
+		};
+		var currentLocalArchiveInstance = this;
+		this.recursivePrefixSearch(reader, titleName, 0, titleFileSize, function(titleOffset) {
+			currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction);
+		});
+	};
+	
+	/**
+	 * Get a random title, and call the callbackFunction with this Title
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.getRandomTitle = function(callbackFunction) {
+		// TODO to be implemented
+	};
+	
+	/**
+	 * Find the 50 titles that start with the given prefix, and call the callbackFunction with this list of Titles
+	 * @param prefix
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.findTitlesWithPrefix = function(prefix, callbackFunction) {
+		var titleFileSize = this.titleFile.size;
+		if (prefix) {
+			prefix = remove_diacritics.normalizeString(prefix);
+		}
+
+		var reader = new FileReader();
+		reader.onerror = errorHandler;
+		reader.onabort = function(e) {
+			alert('Title file read cancelled');
+		};
+		var currentLocalArchiveInstance = this;
+		this.recursivePrefixSearch(reader, prefix, 0, titleFileSize, function(titleOffset) {
+			currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, 50, callbackFunction);
+		});
+	};
+	
+	/**
+	 * Read an article from the title instance, and call the callbackFunction with the article HTML String
+	 * @param title
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.readArticle = function(title, callbackFunction) {
+		var dataFile = null;
+
+		var prefixedFileNumber = "";
+		if (title.fileNr<10) {
+			prefixedFileNumber = "0" + title.fileNr;
+		}
+		else {
+			prefixedFileNumber = title.fileNr;
+		}
+		var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
+
+		// Find the good dump file
+		for (var i=0; i<this.dataFiles.length; i++) {
+			var fileName = this.dataFiles[i].name;
+			// Check if the fileName ends with the expected file name (in case of DeviceStorage usage, the fileName is prefixed by the directory)
+			if (fileName.match(expectedFileName+"$") == expectedFileName) {
+				dataFile = this.dataFiles[i];
+			}
+		}
+		if (!dataFile) {
+			throw "File number " + title.fileNr + " not found";
+		}
+		else {
+			var reader = new FileReader();
+			reader.onerror = errorHandler;
+			reader.onabort = function(e) {
+				alert('Data file read cancelled');
+			};
+			reader.onload = function(e) {
+				var compressedArticles = e.target.result;
+				//var htmlArticle = ArchUtils.bz2.decode(compressedArticles);
+				// TODO : should be improved by uncompressing the content chunk by chunk,
+				// until the length is reached, instead of uncompressing everything
+				var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
+				// Start reading at offset, and keep length characters
+				var htmlArticle = htmlArticles.substring(title.blockOffset,title.blockOffset + title.articleLength);
+				// Keep only length characters
+				htmlArticle = htmlArticle.substring(0,title.articleLength);
+				// Decode UTF-8 encoding
+				htmlArticle = decodeURIComponent(escape(htmlArticle));
+
+				callbackFunction (htmlArticle);
+			};
+
+			// TODO : should be improved by reading the file chunks by chunks until the article is found,
+			// instead of reading the whole file starting at blockstart
+			var blob = dataFile.slice(title.blockStart);
+
+			// Read in the image file as a binary string.
+			reader.readAsArrayBuffer(blob);
+		}
+
+	};
+	
+	/**
+	 * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance
+	 * @param title
+	 * @param callbackFunction
+	 */
+	LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) {
+		var reader = new FileReader();
+		reader.onerror = errorHandler;
+		reader.onabort = function(e) {
+			alert('Title file read cancelled');
+		};
+		reader.onload = function(e) {
+			var binaryTitleFile = e.target.result;
+			var byteArray = new Uint8Array(binaryTitleFile);
+
+			var redirectedTitle = title;
+			redirectedTitle.fileNr = byteArray[2];
+			redirectedTitle.blockStart = readIntegerFrom4Bytes(byteArray,3);
+			redirectedTitle.blockOffset = readIntegerFrom4Bytes(byteArray,7);
+			redirectedTitle.articleLength = readIntegerFrom4Bytes(byteArray,11);
+
+			callbackFunction(redirectedTitle);
+		};
+		// Read only the 16 necessary bytes, starting at title.blockStart
+		var blob = titleFile.slice(title.blockStart,title.blockStart+16);
+		// Read in the file as a binary string
+		reader.readAsArrayBuffer(blob);
+	};
 	
 	
 	/**
@ -125,13 +381,57 @@ define(function(require) {
 		return utf8ByteArrayToString(encodedTitle, 15, len);
 	};
 	
-	Title.prototype.toStringId = function(){
-		return this.archive.language + "_" + this.archive.date + "_" + this.titleOffset;
+	/**
+	 * Creates a title instance from a serialized id
+	 */
+	Title.parseTitleId = function(localArchive, titleId) {
+			var title = new Title();
+			var idParts = titleId.split("|");
+			title.archive = localArchive;
+			title.fileNr = idParts[2];
+			title.titleOffset = idParts[3];
+			title.name = idParts[4];
+			title.blockStart = idParts[5];
+			title.blockOffset = idParts[6];
+			title.articleLength = idParts[7];
+			return title;
 	};
+	
+	
+	/**
+	 * Serialize the title with its values
+	 * @returns {String}
+	 */
+	Title.prototype.toStringId = function(){
+		return this.archive.language + "|" + this.archive.date + "|" + this.fileNr + "|"
+			+ this.titleOffset + "|" + this.name + "|" + this.blockStart + "|" + this.blockOffset + "|" + this.articleLength ;
+	};
+	
+	/**
+	 * Serialize the title in a readable way
+	 */
 	Title.prototype.toString = function(){
 		return "title.id = " + this.toStringId() + "title.name = " + this.name + " title.fileNr = " + this.fileNr + " title.blockStart = " + this.blockStart + " title.blockOffset = " + this.blockOffset + " title.articleLength = " + this.articleLength;
 	};
 	
+	/**
+	 * ErrorHandler for FileReader
+	 */
+	function errorHandler(evt) {
+		switch(evt.target.error.code) {
+		case evt.target.error.NOT_FOUND_ERR:
+			alert('File Not Found!');
+			break;
+		case evt.target.error.NOT_READABLE_ERR:
+			alert('File is not readable');
+			break;
+		case evt.target.error.ABORT_ERR:
+			break; // noop
+		default:
+			alert('An error occurred reading this file.');
+		};
+	}
+	
 	/**
 	 * Functions and classes exposed by this module
 	 */