On desktops, it works at least on recent Firefox and Chrome
+
On desktops, it works on recent Firefox and Chrome, and maybe on other browsers
On the Firefos OS simulator, you have (for now) to put the small dump files in a "fake-sdcard" folder of your firefox profile (ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/profile/fake-sdcard). It looks for wikipedia_small_2010-08-14/titles.idx in it. You also need to install the application from the dashboard of the simulator instead of accessing via the browser (due to security restrictions in Firefox OS : only certified webapps can access the sdcard)
I could not test it on a real Firefox OS device : if someone did, please let me know
- It's only a proof of concept sor far : there are certainly many many ways this could be enhanced (suggestions and patches are welcome : the source code is on github). In particular, the performance can be optimized when reading an article. I also know the links inside an article do not work very well for now.
+ It's only a proof of concept so far : there are many many ways this could be enhanced (suggestions and patches are welcome : the source code is on github). In particular, the performance can be optimized when reading an article. I also know the searches are not always very accurate, and the links inside an article do not work well for now.
Please select the file titles.idx : 0) {
- var filenumber = document.getElementById('filenumber').value;
- var blockstart = document.getElementById('blockstart').value;
- var blockoffset = document.getElementById('blockoffset').value;
- var length = document.getElementById('length').value;
- if (filenumber==255) {
- // It's a redirect : use redirected offsets
- filenumber = document.getElementById('redirectfilenumber').value;
- blockstart = document.getElementById('redirectblockstart').value;
- blockoffset = document.getElementById('redirectblockoffset').value;
- length = document.getElementById('redirectlength').value;
- if (!filenumber || filenumber=="") {
- // TODO : better handle this case
- alert("Redirect offsets not read yet");
- }
- }
- var dataFile = null;
- // Find the good dump file
- for (var i=0; i0) {
+ var title = evopedia.Title.parseTitleId(localArchive,titleId);
+ if (title.fileNr == 255) {
+ localArchive.resolveRedirect(title, readArticle);
}
else {
- prefixedFileNumber = filenumber;
+ readArticle(title);
}
- var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
- // Check if the fileName ends with the expected file name (in case of DeviceStorage usage, the fileName is prefixed by the directory)
- if (fileName.match(expectedFileName+"$") == expectedFileName) {
- dataFile = dataFiles[i];
- }
- }
- if (!dataFile) {
- alert("File number " + filenumber + " not found");
- document.getElementById("articleContent").innerHTML="";
}
else {
- readArticleFromOffset(dataFile, blockstart, blockoffset, length);
+ alert("Data files not set");
}
}
- else {
- alert("Data files not set");
+
+ /**
+ * Read the article corresponding to the given title
+ */
+ function readArticle(title) {
+ if ($.isArray(title)) {
+ title = title[0];
+ }
+ localArchive.readArticle(title, displayArticleInForm);
}
-}
-
-/**
- * Read an article in a dump file, based on given offsets
- */
-function readArticleFromOffset(dataFile, blockstart, blockoffset, length) {
-
- var reader = new FileReader();
- reader.onerror = errorHandler;
- reader.onabort = function(e) {
- alert('Data file read cancelled');
- };
- reader.onload = function(e) {
- var compressedArticles = e.target.result;
- //var htmlArticle = ArchUtils.bz2.decode(compressedArticles);
- // TODO : should be improved by uncompressing the content chunk by chunk,
- // until the length is reached, instead of uncompressing everything
- var htmlArticles = bzip2.simple(bzip2.array(new Uint8Array(compressedArticles)));
- // Start reading at offset, and keep length characters
- var htmlArticle = htmlArticles.substring(blockoffset,blockoffset+length);
- // Keep only length characters
- htmlArticle = htmlArticle.substring(0,length);
- // Decode UTF-8 encoding
- htmlArticle = decodeURIComponent(escape(htmlArticle));
+ /**
+ * Display the the given HTML article in the web page,
+ * and convert links to javascript calls
+ */
+ function displayArticleInForm(htmlArticle) {
// Display the article inside the web page.
$('#articleContent').html(htmlArticle);
-
+
// Convert links into javascript calls
$('#articleContent').find('a').each(function(){
- // Store current link's url
- var url = $(this).attr("href");
-
- if(url.slice(0, 1) == "#") {
- // It's an anchor link : do nothing
- }
- else if (url.substring(0,4) === "http") {
- // It's an external link : do nothing
- }
- else {
- // It's a link to another article : add an onclick event to go to this article
- // instead of following the link
- $(this).on('click', function(e) {
- goToArticle($(this).attr("href"));
- return false;
- });
- }
-
- });
- };
-
- // TODO : should be improved by reading the file chunks by chunks until the article is found,
- // instead of reading the whole file starting at blockstart
- var blob = dataFile.slice(blockstart);
-
- // Read in the image file as a binary string.
- reader.readAsArrayBuffer(blob);
-}
-
-function errorHandler(evt) {
- switch(evt.target.error.code) {
- case evt.target.error.NOT_FOUND_ERR:
- alert('File Not Found!');
- break;
- case evt.target.error.NOT_READABLE_ERR:
- alert('File is not readable');
- break;
- case evt.target.error.ABORT_ERR:
- break; // noop
- default:
- alert('An error occurred reading this file.');
- };
-}
-
-function handleDataFileSelect(evt) {
- dataFiles = evt.target.files;
-}
-
-function handleTitleFileSelect(evt) {
- titleFile = evt.target.files[0];
-}
-
-/**
- * Handle Enter key in the prefix input zone
- */
-function onKeyUpPrefix(evt) {
- if (evt.keyCode == 13) {
- document.getElementById("searchTitles").click();
+ // Store current link's url
+ var url = $(this).attr("href");
+
+ if(url.slice(0, 1) == "#") {
+ // It's an anchor link : do nothing
+ }
+ else if (url.substring(0,4) === "http") {
+ // It's an external link : do nothing
+ }
+ else {
+ // It's a link to another article : add an onclick event to go to this article
+ // instead of following the link
+ $(this).on('click', function(e) {
+ goToArticle($(this).attr("href"));
+ return false;
+ });
+ }
+ });
}
-}
-/**
- * Replace article content with the one of the given title
- */
-function goToArticle(title) {
- // This is awful and does not work very well.
- // It's just temporary before the algorithm is rewritten in an object-oriented way
- // TODO : rewrite this with a real article search and display
- searchTitlesFromPrefix(titleFile,title);
- updateOffsetsFromTitle($('#titleList').val());
- document.getElementById("articleContent").innerHTML="";
-}
+
+ /**
+ * Replace article content with the one of the given title
+ */
+ function goToArticle(title) {
+ $("#articleContent").html("Loading article from dump...");
+ localArchive.getTitleByName(title, readArticle);
+ }
});
diff --git a/www/js/lib/evopedia.js b/www/js/lib/evopedia.js
index 32332e16..983a3851 100644
--- a/www/js/lib/evopedia.js
+++ b/www/js/lib/evopedia.js
@@ -1,5 +1,9 @@
define(function(require) {
+ // Module dependencies
+ var remove_diacritics = require('remove_diacritics');
+ var bzip2 = require('bzip2');
+
/**
* Read an integer encoded in 4 bytes
*/
@@ -37,11 +41,263 @@ define(function(require) {
* It's still minimal for now. TODO : complete implementation to handle maths and coordinates
*/
function LocalArchive() {
- this.directory = null;
+ this.dataFiles = new Array();
this.titleFile = null;
- this.date = null;
- this.language = null;
- }
+ // TODO to be replaced by the real archive attributes
+ this.date = "2013-03-14";
+ this.language = "zz";
+ };
+
+ /**
+ * This function is recursively called after each asynchronous read,
+ * so that to find the closest index in titleFile to the given prefix
+ * When found, call the callbackFunction with the index
+ * @param reader
+ * @param prefix
+ * @param lo
+ * @param hi
+ * @param callbackFunction
+ */
+ LocalArchive.prototype.recursivePrefixSearch = function(reader, prefix, lo, hi, callbackFunction) {
+ if (lo < hi-1 ) {
+ var mid = Math.round((lo+hi)/2);
+ // TODO : improve the way we read this file : 128 bytes is arbitrary and might be too small
+ var blob = this.titleFile.slice(mid,mid+128);
+ var currentLocalArchiveInstance = this;
+ reader.onload = function(e) {
+ var binaryTitleFile = e.target.result;
+ var byteArray = new Uint8Array(binaryTitleFile);
+ // Look for the index of the next NewLine
+ var newLineIndex=0;
+ while (newLineIndex=0) {
+ titleList[titleNumber] = title;
+ }
+ titleNumber++;
+ i=newLineIndex+1;
+ }
+ callbackFunction(titleList);
+ };
+ var blob = this.titleFile.slice(titleOffset);
+ // Read in the file as a binary string
+ reader.readAsArrayBuffer(blob);
+ };
+
+ /**
+ * Look for a title by its name, and call the callbackFunction with this Title
+ * @param titleName
+ * @param callbackFunction
+ */
+ LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) {
+ var titleFileSize = this.titleFile.size;
+ var reader = new FileReader();
+ reader.onerror = errorHandler;
+ reader.onabort = function(e) {
+ alert('Title file read cancelled');
+ };
+ var currentLocalArchiveInstance = this;
+ this.recursivePrefixSearch(reader, titleName, 0, titleFileSize, function(titleOffset) {
+ currentLocalArchiveInstance.getTitleAtOffset(titleOffset, callbackFunction);
+ });
+ };
+
+ /**
+ * Get a random title, and call the callbackFunction with this Title
+ * @param callbackFunction
+ */
+ LocalArchive.prototype.getRandomTitle = function(callbackFunction) {
+ // TODO to be implemented
+ };
+
+ /**
+ * Find the 50 titles that start with the given prefix, and call the callbackFunction with this list of Titles
+ * @param prefix
+ * @param callbackFunction
+ */
+ LocalArchive.prototype.findTitlesWithPrefix = function(prefix, callbackFunction) {
+ var titleFileSize = this.titleFile.size;
+ if (prefix) {
+ prefix = remove_diacritics.normalizeString(prefix);
+ }
+
+ var reader = new FileReader();
+ reader.onerror = errorHandler;
+ reader.onabort = function(e) {
+ alert('Title file read cancelled');
+ };
+ var currentLocalArchiveInstance = this;
+ this.recursivePrefixSearch(reader, prefix, 0, titleFileSize, function(titleOffset) {
+ currentLocalArchiveInstance.getTitlesStartingAtOffset(titleOffset, 50, callbackFunction);
+ });
+ };
+
+ /**
+ * Read an article from the title instance, and call the callbackFunction with the article HTML String
+ * @param title
+ * @param callbackFunction
+ */
+ LocalArchive.prototype.readArticle = function(title, callbackFunction) {
+ var dataFile = null;
+
+ var prefixedFileNumber = "";
+ if (title.fileNr<10) {
+ prefixedFileNumber = "0" + title.fileNr;
+ }
+ else {
+ prefixedFileNumber = title.fileNr;
+ }
+ var expectedFileName = "wikipedia_"+prefixedFileNumber+".dat";
+
+ // Find the good dump file
+ for (var i=0; i