Big refactoring of the code, in a more object-oriented way.

But I could not push it as much as I would like : the FileReader API is asynchronous, which prevents some implementations (ex : a constructor in a class cannot read files) The title searching is not very accurate for now, which is why the links do not work very well for now
2025-09-22 12:01:15 -04:00 · 2013-03-15 13:03:48 +01:00 · 2013-03-15 13:03:48 +01:00 · 7802cbf3b8
commit 7802cbf3b8
parent 6c5020b42a
3 changed files with 472 additions and 391 deletions
--- a/www/index.html
+++ b/www/index.html
@ -48,16 +48,16 @@ License:
 	<br />
 	To use it, you have to first download locally a dump from <a href="http://dumpathome.evopedia.info/dumps/finished">http://dumpathome.evopedia.info/dumps/finished</a> (with a Bittorrent client), and select some of the dowloaded files below.
 	<br />
-	Current status : I have tested it with the <a href="http://evopedia.info/dumps/wikipedia_small_2010-08-14.torrent">small dump (2010-08-14)</a>, the <a href="http://evopedia.info/dumps/wikipedia_fr_2012-02-03.torrent">French dump (2012-02-03)</a>, the <a href="http://evopedia.info/dumps/wikipedia_frwiktionary_2011-03-16.torrent">French wiktionary dump (2011-03-16)</a> and the <a href="http://evopedia.info/dumps/wikipedia_en_2012-02-11.torrent">English dump (2012-02-11)</a>
+	I have tested it with the <a href="http://evopedia.info/dumps/wikipedia_small_2010-08-14.torrent">small dump (2010-08-14)</a>, the <a href="http://evopedia.info/dumps/wikipedia_fr_2012-02-03.torrent">French dump (2012-02-03)</a>, the <a href="http://evopedia.info/dumps/wikipedia_frwiktionary_2011-03-16.torrent">French wiktionary dump (2011-03-16)</a> and the <a href="http://evopedia.info/dumps/wikipedia_en_2012-02-11.torrent">English dump (2012-02-11)</a>
 	<br />
 	<br />
 	<ul>
-	<li>On desktops, it works at least on recent Firefox and Chrome</li>
+	<li>On desktops, it works on recent Firefox and Chrome, and maybe on other browsers</li>
 	<li>On the Firefos OS simulator, you have (for now) to put the small dump files in a "fake-sdcard" folder of your firefox profile (ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/profile/fake-sdcard). It looks for wikipedia_small_2010-08-14/titles.idx in it. You also need to install the application from the dashboard of the simulator instead of accessing via the browser (due to security restrictions in Firefox OS : only certified webapps can access the sdcard)</li>
 	<li>I could not test it on a real Firefox OS device : if someone did, please let me know</li>
 	</ul>
 	<br />
-	It's only a proof of concept sor far : there are certainly many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular, the performance can be optimized when reading an article. I also know the links inside an article do not work very well for now.
+	It's only a proof of concept so far : there are many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular, the performance can be optimized when reading an article. I also know the searches are not always very accurate, and the links inside an article do not work well for now.
 	<br />
 	<div id="openLocalFiles" style="display: none;">
 		<br /> Please select the file titles.idx :<br /> <input type="file"
--- a/www/js/app.js
+++ b/www/js/app.js
@ -18,419 +18,200 @@ define(function(require) {
    require('./install-button');
    // Evopedia javascript dependencies
    var bzip2 = require('bzip2');
    var remove_diacritics = require('remove_diacritics');
    var evopedia = require('evopedia');
-var dataFiles=document.getElementById('dataFiles').files;
+    var localArchive = null;
-var titleFile=document.getElementById('titleFile').files[0];
+    setLocalArchiveFromFileSelect();
-// Define behavior of HTML elements
+    // Define behavior of HTML elements
-$('#searchTitles').on('click', function(e) {
+    $('#searchTitles').on('click', function(e) {
-searchTitlesFromPrefix(titleFile,$('#prefix').val());
+    	searchTitlesFromPrefix($('#prefix').val());
-});
+    });
-$('#titleList').on('change', function(e) {
+    $('#toggleDebug').on('click', function(e) {
-updateOffsetsFromTitle(this.value);
+    	switchDebugOnOff();
-});
+    });
-$('#toggleDebug').on('click', function(e) {
+    $('#readData').on('click', function(e) {
-switchDebugOnOff();
+    	findTitleFromTitleIdAndLaunchArticleRead($('#titleList').val());
-});
+    });
-$('#readData').on('click', function(e) {
+    $('#prefix').on('keyup', function(e) {
-readArticleFromHtmlForm(dataFiles);
+    	onKeyUpPrefix(e);
-});
+    });
 $('#prefix').on('keyup', function(e) {
 onKeyUpPrefix(e);
 });
-// Detect if DeviceStorage is available
+    // Detect if DeviceStorage is available
-var storage = null;
+    var storage = null;
-if ($.isFunction(navigator.getDeviceStorage)) {
+    if ($.isFunction(navigator.getDeviceStorage)) {
-	storage = navigator.getDeviceStorage('sdcard');
+    	storage = navigator.getDeviceStorage('sdcard');
-}
+    }
-if (storage != null) {
+    if (storage != null) {
-	var filerequest = storage.get('wikipedia_small_2010-08-14/wikipedia_00.dat');
+    	var filerequest = storage.get('wikipedia_small_2010-08-14/wikipedia_00.dat');
-	filerequest.onsuccess = function() {
+    	filerequest.onsuccess = function() {
-		dataFiles = [];
+    		localArchive = new evopedia.LocalArchive();
-		dataFiles[0] = filerequest.result;
+    		localArchive.dataFiles[0] = filerequest.result;
-		filerequest = storage.get('wikipedia_small_2010-08-14/titles.idx');
+    		filerequest = storage.get('wikipedia_small_2010-08-14/titles.idx');
-		filerequest.onsuccess = function() {
+    		filerequest.onsuccess = function() {
-			titleFile = filerequest.result;
+    			localArchive.titleFile = filerequest.result;
-		};
+    		};
-		filerequest.onerror = function(event) {
+    		filerequest.onerror = function(event) {
-			alert("error reading title file : " + event.target.error.name);
+    			alert("error reading title file : " + event.target.error.name);
-		};
+    		};
-	};
+    	};
-	filerequest.onerror = function(event) {
+    	filerequest.onerror = function(event) {
-		alert("error reading data file : " + event.target.error.name);
+    		alert("error reading data file : " + event.target.error.name);
-	};
+    	};
-}
+    }
-else {
+    else {
-	displayFileSelect();
+    	displayFileSelect();
-}
+    }
-/**
+    /**
- * Displays the zone to select files from the dump
+	 * Displays the zone to select files from the dump
- */
+	 */
-function displayFileSelect() {
+	function displayFileSelect() {
-	$('#openLocalFiles').show();
+		$('#openLocalFiles').show();
-	document.getElementById('dataFiles').addEventListener('change', handleDataFileSelect, false);
+		$('#dataFiles').on('change', setLocalArchiveFromFileSelect);
-	document.getElementById('titleFile').addEventListener('change', handleTitleFileSelect, false);
+		$('#titleFile').on('change', setLocalArchiveFromFileSelect);
 }
 var debugOn = false;
 /**
 * Print the given string inside the debug zone
 * @param string
 */
 function debug(string) {
 	if (debugOn) {
 		document.getElementById("debugTextarea").value+=string+"\n";
 	}
 }
-/**
+	var debugOn = false;
 * Switch debug mode On/Off
 */
 function switchDebugOnOff() {
 	if (debugOn == true) {
 		debugOn = false;
 		$('#debugZone').hide();
 	}
 	else {
 		debugOn = true;
 		$('#debugZone').show();
 	}
 }
-/**
+	/**
- * Set the Offsets HTML fields from the selected title
+	 * Print the given string inside the debug zone
- */
+	 * 
-function updateOffsetsFromTitle(selectValue) {
+	 * @param string
-	var offsets=selectValue.split(/\|/);
+	 */
-	document.getElementById("filenumber").value=offsets[0];
+	function debug(string) {
-	document.getElementById("blockstart").value=offsets[1];
+		if (debugOn) {
-	document.getElementById("blockoffset").value=offsets[2];
+			document.getElementById("debugTextarea").value += string + "\n";
 	document.getElementById("length").value=offsets[3];
 	if (offsets[0]==255) {
 		// It's a redirect : find out the real offsets (asynchronous read)
 		readRedirectOffsets(titleFile,offsets[1]);
 	}
 	else {
 		document.getElementById('redirectfilenumber').value = "";
 		document.getElementById('redirectblockstart').value = "";
 		document.getElementById('redirectblockoffset').value = "";
 		document.getElementById('redirectlength').value = "";
 	}
 }
 /**
 * This function is recursively called after each asynchronous read,
 * so that to find the closest index in titleFile to the given prefix
 */
 function recursivePrefixSearch(titleFile, reader, prefix, lo, hi) {
 	if (lo < hi-1 ) {
 		var mid = Math.round((lo+hi)/2);
 		// TODO : improve the way we read this file : 256 bytes is arbitrary and might be too small
 		var blob = titleFile.slice(mid,mid+256);
 		reader.onload = function(e) {
 			var binaryTitleFile = e.target.result;
 			var byteArray = new Uint8Array(binaryTitleFile);
 			// Look for the index of the next NewLine
 			var newLineIndex=0;	
 			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 				newLineIndex++;
 			}
 			var i = newLineIndex+1;
 			newLineIndex = i+15;
 			// Look for the index of the next NewLine	
 			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 				newLineIndex++;
 			}
 			var title = evopedia.utf8ByteArrayToString(byteArray,i+15,newLineIndex);
 			debug("title found : "+title);
 			if (title.localeCompare(prefix)<0) {
 				lo = mid;
 			}
 			else {
 				hi = mid;
 			}
 			recursivePrefixSearch(titleFile, reader, prefix, lo, hi);
 		};
 		debug("Reading the file from "+mid+" to "+(mid+256)+" because lo="+lo+" and hi="+hi);			
 		// Read the file as a binary string
 		reader.readAsArrayBuffer(blob);		
 	}
 	else {
 		// We found the closest title
 		debug ("Found the closest title near index "+lo);
 		readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,lo);
 	}
 }
 /**
 * Search the index for titles that start with the given prefix
 * (implemented with a binary search inside the index file)
 */
 function searchTitlesFromPrefix(titleFile, prefix) {
 	if (titleFile) {
 		var titleFileSize = titleFile.size;
 		prefix = remove_diacritics.normalizeString(prefix);
 		var reader = new FileReader();
 		reader.onerror = errorHandler;
 		reader.onabort = function(e) {
 			alert('Title file read cancelled');
 		};
 		recursivePrefixSearch(titleFile, reader, prefix, 0, titleFileSize);
 	}
 	else {
 		alert ("Title file not set");
 	}
 }
 /**
 * Read the real offsets when a redirect was found, based on the redirectIndex provided
 * The file read is asynchronous, and populates the html form as soon as the offsets are found
 * @param titleFile
 * @param redirectIndex
 */
 function readRedirectOffsets(titleFile,redirectIndex) {
 	var reader = new FileReader();
 	reader.onerror = errorHandler;
 	reader.onabort = function(e) {
 		alert('Title file read cancelled');
 	};
 	reader.onload = function(e) {
 		var binaryTitleFile = e.target.result;
 		var byteArray = new Uint8Array(binaryTitleFile);
 		var filenumber = byteArray[2];
 		var blockstart = evopedia.readIntegerFrom4Bytes(byteArray,3);
 		var blockoffset = evopedia.readIntegerFrom4Bytes(byteArray,7);
 		var length = evopedia.readIntegerFrom4Bytes(byteArray,11);
 		document.getElementById('redirectfilenumber').value = filenumber;
 		document.getElementById('redirectblockstart').value = blockstart;
 		document.getElementById('redirectblockoffset').value = blockoffset;
 		document.getElementById('redirectlength').value = length;
 	};
 	// Read only the 16 necessary bytes
 	var blob = titleFile.slice(redirectIndex,redirectIndex+16);
 	// Read in the file as a binary string
 	reader.readAsArrayBuffer(blob);
 }
 /**
 * Read the titles following the given index in the title file, until one of the following conditions is reached :
 * - the title does not start with the prefix anymore
 * - we already read the maximum number of titles
 * and populate the dropdown list
 */
 function readTitlesBeginningAtIndexStartingWithPrefix(titleFile,prefix,startIndex) {
 	var reader = new FileReader();
 	reader.onerror = errorHandler;
 	reader.onabort = function(e) {
 		alert('Title file read cancelled');
 	};
 	reader.onload = function(e) {
 		var binaryTitleFile = e.target.result;
 		var byteArray = new Uint8Array(binaryTitleFile);
 		// Look for the index of the next NewLine
 		var newLineIndex=0;	
 		while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 			newLineIndex++;
 		}
-		var i = newLineIndex;
+	}
-		var titleNumber=0;
+
 	/**
 	 * Switch debug mode On/Off
 	 */
 	function switchDebugOnOff() {
 		if (debugOn == true) {
 			debugOn = false;
 			$('#debugZone').hide();
 		} else {
 			debugOn = true;
 			$('#debugZone').show();
 		}
 	}
 	function setLocalArchiveFromFileSelect() {
 		dataFiles=document.getElementById('dataFiles').files;
 		titleFile=document.getElementById('titleFile').files[0];
 		localArchive = new evopedia.LocalArchive();
 		localArchive.dataFiles = dataFiles;
 		localArchive.titleFile = titleFile;
 	}
 	/**
 	 * Handle Enter key in the prefix input zone
 	 */
 	function onKeyUpPrefix(evt) {
 		if (evt.keyCode == 13) {
 			document.getElementById("searchTitles").click();
 		}
 	}
 	/**
 	 * Search the index for titles that start with the given prefix (implemented
 	 * with a binary search inside the index file)
 	 */
 	function searchTitlesFromPrefix(prefix) {
 		if (localArchive.titleFile) {
 			localArchive.findTitlesWithPrefix(prefix, populateDropDownListOfTitles);
 		} else {
 			alert("Title file not set");
 		}
 	}
 	/**
 	 * Populate the drop-down list of titles with the given list
 	 */
 	function populateDropDownListOfTitles(titleList) {
 		var comboTitleList = document.getElementById('titleList');
-		while (i<byteArray.length && titleNumber<50) {
+		for (var i=0; i<titleList.length; i++) {
-			// Look for the index of the next NewLine
+			var title = titleList[i];
-			newLineIndex+=15;
+			comboTitleList.options[i] = new Option (title.name, title.toStringId());
 			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 				newLineIndex++;
 			}
 			// Copy the encodedTitle in a new Array
 			var encodedTitle = new Uint8Array(newLineIndex-i);
 			for (var j = 0; j < newLineIndex-i; j++) {
 				encodedTitle[j] = byteArray[i+j];
 			}
 			var title = evopedia.Title.parseTitle(encodedTitle, new evopedia.LocalArchive(), i);
 			// Skip the titles that do not start with the prefix
 			// TODO use a normalizer to compare the strings
 			if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
 				comboTitleList.options[titleNumber] = new Option (title.name, title.fileNr + "|" + title.blockStart + "|" + title.blockOffset + "|" + title.articleLength);
 				debug("Title : startIndex = " + i + " endIndex = " + newLineIndex + title.toString());
 				titleNumber++;
 			}
 			i=newLineIndex+1;
 		}
-		// Update the offsets, as if the first item of the list was selected by the user
+	}
 		updateOffsetsFromTitle($('#titleList').val());
 	};
 	var blob = titleFile.slice(startIndex);
 	// Read in the file as a binary string
 	reader.readAsArrayBuffer(blob);
 }
-/**
+	/**
- * Decompress and read an article in dump files
+	 * Creates an instance of title from given titleId (including resolving redirects),
- */
+	 * and call the function to read the corresponding article
-function readArticleFromHtmlForm(dataFiles) {
+	 */
-	document.getElementById("articleContent").innerHTML="Loading article from dump...";
+	function findTitleFromTitleIdAndLaunchArticleRead(titleId) {
-	if (dataFiles && dataFiles.length>0) {
+		$("#articleContent").html("Loading article from dump...");
-		var filenumber = document.getElementById('filenumber').value;
+		if (localArchive.dataFiles && localArchive.dataFiles.length>0) {
-		var blockstart = document.getElementById('blockstart').value;
+			var title = evopedia.Title.parseTitleId(localArchive,titleId);
-		var blockoffset = document.getElementById('blockoffset').value;
+			if (title.fileNr == 255) {
-		var length = document.getElementById('length').value;
+				localArchive.resolveRedirect(title, readArticle);
 		if (filenumber==255) {
 			// It's a redirect : use redirected offsets
 			filenumber = document.getElementById('redirectfilenumber').value;
 			blockstart = document.getElementById('redirectblockstart').value;
 			blockoffset = document.getElementById('redirectblockoffset').value;
 			length = document.getElementById('redirectlength').value;
 			if (!filenumber || filenumber=="") {
 				// TODO : better handle this case
 				alert("Redirect offsets not read yet");
 			}
 		}
 		var dataFile = null;
 		// Find the good dump file
 		for (var i=0; i<dataFiles.length; i++) {
 			var fileName = dataFiles[i].name;
 			var prefixedFileNumber = "";
 			if (filenumber<10) {
 				prefixedFileNumber = "0"+filenumber;
 			}
 			else {
-				prefixedFileNumber = filenumber;
+				readArticle(title);
 			}
 			var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
 			// Check if the fileName ends with the expected file name (in case of DeviceStorage usage, the fileName is prefixed by the directory)
 			if (fileName.match(expectedFileName+"$") == expectedFileName) {
 				dataFile = dataFiles[i];
 			}
 		}
 		if (!dataFile) {
 			alert("File number " + filenumber + " not found");
 			document.getElementById("articleContent").innerHTML="";
 		}
 		else {
-			readArticleFromOffset(dataFile, blockstart, blockoffset, length);
+			alert("Data files not set");
 		}
 	}
-	else {
+
-		alert("Data files not set");
+	/**
 	 * Read the article corresponding to the given title
 	 */
 	function readArticle(title) {
 		if ($.isArray(title)) {
 			title = title[0];
 		}
 		localArchive.readArticle(title, displayArticleInForm);
 	}
 }
 /**
 * Read an article in a dump file, based on given offsets
 */
 function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
 	var reader = new FileReader();
 	reader.onerror = errorHandler;
 	reader.onabort = function(e) {
 		alert('Data file read cancelled');
 	};
 	reader.onload = function(e) {
 		var compressedArticles = e.target.result;
 		//var htmlArticle = ArchUtils.bz2.decode(compressedArticles);
 		// TODO : should be improved by uncompressing the content chunk by chunk,
 		// until the length is reached, instead of uncompressing everything
 		var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
 		// Start reading at offset, and keep length characters
 		var htmlArticle = htmlArticles.substring(blockoffset,blockoffset+length);
 		// Keep only length characters
 		htmlArticle = htmlArticle.substring(0,length);
 		// Decode UTF-8 encoding
 		htmlArticle = decodeURIComponent(escape(htmlArticle));
 	/**
 	 * Display the the given HTML article in the web page,
 	 * and convert links to javascript calls
 	 */
 	function displayArticleInForm(htmlArticle) {
 		// Display the article inside the web page.		
 		$('#articleContent').html(htmlArticle);
 		// Convert links into javascript calls
 		$('#articleContent').find('a').each(function(){
-            // Store current link's url
+			// Store current link's url
-            var url = $(this).attr("href");
+			var url = $(this).attr("href");
-            if(url.slice(0, 1) == "#") {
+			if(url.slice(0, 1) == "#") {
-                // It's an anchor link : do nothing
+				// It's an anchor link : do nothing
-            }
+			}
-            else if (url.substring(0,4) === "http") {
+			else if (url.substring(0,4) === "http") {
-            	// It's an external link : do nothing
+				// It's an external link : do nothing
-            }
+			}
-            else {
+			else {
-            	// It's a link to another article : add an onclick event to go to this article
+				// It's a link to another article : add an onclick event to go to this article
-            	// instead of following the link
+				// instead of following the link
-            	$(this).on('click', function(e) {
+				$(this).on('click', function(e) {
-              	   goToArticle($(this).attr("href"));
+					goToArticle($(this).attr("href"));
-              	   return false;
+					return false;
-                 });
+				});
-            }
+			}
-
+		});
        });
 	};
 	// TODO : should be improved by reading the file chunks by chunks until the article is found,
 	// instead of reading the whole file starting at blockstart
 	var blob = dataFile.slice(blockstart);
 	// Read in the image file as a binary string.
 	reader.readAsArrayBuffer(blob);
 }
 function errorHandler(evt) {
 	switch(evt.target.error.code) {
 	case evt.target.error.NOT_FOUND_ERR:
 		alert('File Not Found!');
 		break;
 	case evt.target.error.NOT_READABLE_ERR:
 		alert('File is not readable');
 		break;
 	case evt.target.error.ABORT_ERR:
 		break; // noop
 	default:
 		alert('An error occurred reading this file.');
 	};
 }
 function handleDataFileSelect(evt) {
 	dataFiles = evt.target.files;
 }
 function handleTitleFileSelect(evt) {
 	titleFile = evt.target.files[0];
 }
 /**
 * Handle Enter key in the prefix input zone
 */
 function onKeyUpPrefix(evt) {
 	if (evt.keyCode == 13) {
 		document.getElementById("searchTitles").click();
 	}
 }
-/**
+
- * Replace article content with the one of the given title
+	/**
- */
+	 * Replace article content with the one of the given title
-function goToArticle(title) {
+	 */
-	// This is awful and does not work very well.
+	function goToArticle(title) {
-	// It's just temporary before the algorithm is rewritten in an object-oriented way 
+		$("#articleContent").html("Loading article from dump...");
-	// TODO : rewrite this with a real article search and display
+		localArchive.getTitleByName(title, readArticle);
-	searchTitlesFromPrefix(titleFile,title);
+	}
 	updateOffsetsFromTitle($('#titleList').val());
 	document.getElementById("articleContent").innerHTML="";
 }
 });
--- a/www/js/lib/evopedia.js
+++ b/www/js/lib/evopedia.js
@ -1,5 +1,9 @@
 define(function(require) {
 	// Module dependencies
 	var remove_diacritics = require('remove_diacritics');
 	var bzip2 = require('bzip2');
 	/**
 	 * Read an integer encoded in 4 bytes
 	 */
@ -37,11 +41,263 @@ define(function(require) {
 	 * It's still minimal for now. TODO : complete implementation to handle maths and coordinates
 	 */
 	function LocalArchive() {
-		this.directory = null;
+		this.dataFiles = new Array();
 		this.titleFile = null;
-		this.date = null;
+		// TODO to be replaced by the real archive attributes
-		this.language = null;
+		this.date = "2013-03-14";
-	}
+		this.language = "zz";
 	};
 	/**
 	 * This function is recursively called after each asynchronous read,
 	 * so that to find the closest index in titleFile to the given prefix
 	 * When found, call the callbackFunction with the index
 	 * @param reader
 	 * @param prefix
 	 * @param lo
 	 * @param hi
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.recursivePrefixSearch = function(reader, prefix, lo, hi, callbackFunction) {
 		if (lo < hi-1 ) {
 			var mid = Math.round((lo+hi)/2);
 			// TODO : improve the way we read this file : 128 bytes is arbitrary and might be too small
 			var blob = this.titleFile.slice(mid,mid+128);
 			var currentLocalArchiveInstance = this;
 			reader.onload = function(e) {
 				var binaryTitleFile = e.target.result;
 				var byteArray = new Uint8Array(binaryTitleFile);
 				// Look for the index of the next NewLine
 				var newLineIndex=0;	
 				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 					newLineIndex++;
 				}
 				var i = newLineIndex+1;
 				newLineIndex = i+15;
 				// Look for the index of the next NewLine	
 				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 					newLineIndex++;
 				}
 				var title = utf8ByteArrayToString(byteArray,i+15,newLineIndex);
 				if (title.localeCompare(prefix)<0) {
 					lo = mid;
 				}
 				else {
 					hi = mid;
 				}
 				currentLocalArchiveInstance.recursivePrefixSearch(reader, prefix, lo, hi, callbackFunction);
 			};		
 			// Read the file as a binary string
 			reader.readAsArrayBuffer(blob);		
 		}
 		else {
 			// We found the closest title at index lo
 			callbackFunction(lo);
 		}
 	};
 	/**
 	 * Look for a title in the title file at the given offset, and call the callbackFunction with this Title
 	 * @param titleOffset
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.getTitleAtOffset = function(titleOffset, callbackFunction) {
 		this.getTitlesStartingAtOffset(titleOffset, 1, callbackFunction);
 	};
 	/**
 	 * Read the titles in the title file starting at the given offset (maximum titleCount), and call the callbackFunction with this list of Title instances
 	 * @param titleOffset
 	 * @param titleCount maximum number of titles to retrieve
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.getTitlesStartingAtOffset = function(titleOffset, titleCount, callbackFunction) {
 		var reader = new FileReader();
 		reader.onerror = errorHandler;
 		reader.onabort = function(e) {
 			alert('Title file read cancelled');
 		};
 		var currentLocalArchiveInstance = this;
 		reader.onload = function(e) {
 			var binaryTitleFile = e.target.result;
 			var byteArray = new Uint8Array(binaryTitleFile);
 			// Look for the index of the next NewLine
 			var newLineIndex=0;	
 			while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 				newLineIndex++;
 			}
 			var i = newLineIndex;
 			var titleNumber=-1;
 			var titleList = new Array();
 			while (i<byteArray.length && titleNumber<titleCount) {
 				// Look for the index of the next NewLine
 				newLineIndex+=15;
 				while (newLineIndex<byteArray.length && byteArray[newLineIndex]!=10) {
 					newLineIndex++;
 				}
 				// Copy the encodedTitle in a new Array
 				var encodedTitle = new Uint8Array(newLineIndex-i);
 				for (var j = 0; j < newLineIndex-i; j++) {
 					encodedTitle[j] = byteArray[i+j];
 				}
 				var title = Title.parseTitle(encodedTitle, currentLocalArchiveInstance, i);
 				// Skip the titles that do not start with the prefix
 				// TODO use a normalizer to compare the strings
 				// TODO see why we need to skip the first title
 				//if (title && title.getReadableName().toLowerCase().indexOf(prefix.toLowerCase())==0) {
 				if (titleNumber>=0) {
 					titleList[titleNumber] = title;
 				}
 				titleNumber++;
 				i=newLineIndex+1;
 			}
 			callbackFunction(titleList);
 		};
 		var blob = this.titleFile.slice(titleOffset);
 		// Read in the file as a binary string
 		reader.readAsArrayBuffer(blob);
 	};
 	/**
 	 * Look for a title by its name, and call the callbackFunction with this Title
 	 * @param titleName
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
 		var titleFileSize = this.titleFile.size;
 		var reader = new FileReader();
 		reader.onerror = errorHandler;
 		reader.onabort = function(e) {
 			alert('Title file read cancelled');
 		};
 		var currentLocalArchiveInstance = this;
 		this.recursivePrefixSearch(reader, titleName, 0, titleFileSize, function(titleOffset) {
 			currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction);
 		});
 	};
 	/**
 	 * Get a random title, and call the callbackFunction with this Title
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.getRandomTitle = function(callbackFunction) {
 		// TODO to be implemented
 	};
 	/**
 	 * Find the 50 titles that start with the given prefix, and call the callbackFunction with this list of Titles
 	 * @param prefix
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.findTitlesWithPrefix = function(prefix, callbackFunction) {
 		var titleFileSize = this.titleFile.size;
 		if (prefix) {
 			prefix = remove_diacritics.normalizeString(prefix);
 		}
 		var reader = new FileReader();
 		reader.onerror = errorHandler;
 		reader.onabort = function(e) {
 			alert('Title file read cancelled');
 		};
 		var currentLocalArchiveInstance = this;
 		this.recursivePrefixSearch(reader, prefix, 0, titleFileSize, function(titleOffset) {
 			currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, 50, callbackFunction);
 		});
 	};
 	/**
 	 * Read an article from the title instance, and call the callbackFunction with the article HTML String
 	 * @param title
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.readArticle = function(title, callbackFunction) {
 		var dataFile = null;
 		var prefixedFileNumber = "";
 		if (title.fileNr<10) {
 			prefixedFileNumber = "0" + title.fileNr;
 		}
 		else {
 			prefixedFileNumber = title.fileNr;
 		}
 		var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
 		// Find the good dump file
 		for (var i=0; i<this.dataFiles.length; i++) {
 			var fileName = this.dataFiles[i].name;
 			// Check if the fileName ends with the expected file name (in case of DeviceStorage usage, the fileName is prefixed by the directory)
 			if (fileName.match(expectedFileName+"$") == expectedFileName) {
 				dataFile = this.dataFiles[i];
 			}
 		}
 		if (!dataFile) {
 			throw "File number " + title.fileNr + " not found";
 		}
 		else {
 			var reader = new FileReader();
 			reader.onerror = errorHandler;
 			reader.onabort = function(e) {
 				alert('Data file read cancelled');
 			};
 			reader.onload = function(e) {
 				var compressedArticles = e.target.result;
 				//var htmlArticle = ArchUtils.bz2.decode(compressedArticles);
 				// TODO : should be improved by uncompressing the content chunk by chunk,
 				// until the length is reached, instead of uncompressing everything
 				var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
 				// Start reading at offset, and keep length characters
 				var htmlArticle = htmlArticles.substring(title.blockOffset,title.blockOffset + title.articleLength);
 				// Keep only length characters
 				htmlArticle = htmlArticle.substring(0,title.articleLength);
 				// Decode UTF-8 encoding
 				htmlArticle = decodeURIComponent(escape(htmlArticle));
 				callbackFunction (htmlArticle);
 			};
 			// TODO : should be improved by reading the file chunks by chunks until the article is found,
 			// instead of reading the whole file starting at blockstart
 			var blob = dataFile.slice(title.blockStart);
 			// Read in the image file as a binary string.
 			reader.readAsArrayBuffer(blob);
 		}
 	};
 	/**
 	 * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance
 	 * @param title
 	 * @param callbackFunction
 	 */
 	LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) {
 		var reader = new FileReader();
 		reader.onerror = errorHandler;
 		reader.onabort = function(e) {
 			alert('Title file read cancelled');
 		};
 		reader.onload = function(e) {
 			var binaryTitleFile = e.target.result;
 			var byteArray = new Uint8Array(binaryTitleFile);
 			var redirectedTitle = title;
 			redirectedTitle.fileNr = byteArray[2];
 			redirectedTitle.blockStart = readIntegerFrom4Bytes(byteArray,3);
 			redirectedTitle.blockOffset = readIntegerFrom4Bytes(byteArray,7);
 			redirectedTitle.articleLength = readIntegerFrom4Bytes(byteArray,11);
 			callbackFunction(redirectedTitle);
 		};
 		// Read only the 16 necessary bytes, starting at title.blockStart
 		var blob = titleFile.slice(title.blockStart,title.blockStart+16);
 		// Read in the file as a binary string
 		reader.readAsArrayBuffer(blob);
 	};
 	/**
@ -125,13 +381,57 @@ define(function(require) {
 		return utf8ByteArrayToString(encodedTitle, 15, len);
 	};
-	Title.prototype.toStringId = function(){
+	/**
-		return this.archive.language + "_" + this.archive.date + "_" + this.titleOffset;
+	 * Creates a title instance from a serialized id
 	 */
 	Title.parseTitleId = function(localArchive, titleId) {
 			var title = new Title();
 			var idParts = titleId.split("|");
 			title.archive = localArchive;
 			title.fileNr = idParts[2];
 			title.titleOffset = idParts[3];
 			title.name = idParts[4];
 			title.blockStart = idParts[5];
 			title.blockOffset = idParts[6];
 			title.articleLength = idParts[7];
 			return title;
 	};
 	/**
 	 * Serialize the title with its values
 	 * @returns {String}
 	 */
 	Title.prototype.toStringId = function(){
 		return this.archive.language + "|" + this.archive.date + "|" + this.fileNr + "|"
 			+ this.titleOffset + "|" + this.name + "|" + this.blockStart + "|" + this.blockOffset + "|" + this.articleLength ;
 	};
 	/**
 	 * Serialize the title in a readable way
 	 */
 	Title.prototype.toString = function(){
 		return "title.id = " + this.toStringId() + "title.name = " + this.name + " title.fileNr = " + this.fileNr + " title.blockStart = " + this.blockStart + " title.blockOffset = " + this.blockOffset + " title.articleLength = " + this.articleLength;
 	};
 	/**
 	 * ErrorHandler for FileReader
 	 */
 	function errorHandler(evt) {
 		switch(evt.target.error.code) {
 		case evt.target.error.NOT_FOUND_ERR:
 			alert('File Not Found!');
 			break;
 		case evt.target.error.NOT_READABLE_ERR:
 			alert('File is not readable');
 			break;
 		case evt.target.error.ABORT_ERR:
 			break; // noop
 		default:
 			alert('An error occurred reading this file.');
 		};
 	}
 	/**
 	 * Functions and classes exposed by this module
 	 */