From 9e9d9ea45b3ad16ece16cf76e82605473a1e8ae2 Mon Sep 17 00:00:00 2001 From: mossroy Date: Fri, 21 Mar 2014 13:46:57 +0100 Subject: [PATCH] Prefix private variables with an underscore. Fixes #72 --- tests/tests.js | 814 +++++++-------- www/js/app.js | 18 +- www/js/lib/archive.js | 1894 +++++++++++++++++----------------- www/js/lib/title.js | 299 +++--- www/js/lib/titleIterators.js | 254 ++--- 5 files changed, 1639 insertions(+), 1640 deletions(-) diff --git a/tests/tests.js b/tests/tests.js index c46fc422..e84f2d99 100644 --- a/tests/tests.js +++ b/tests/tests.js @@ -1,407 +1,407 @@ -/** - * tests.js : Unit tests implemented with qunit - * - * Copyright 2013-2014 Mossroy and contributors - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(function(require) { - - var $ = require('jquery'); - var evopediaTitle = require('title'); - var evopediaArchive = require('archive'); - var geometry = require('geometry'); - var util = require('util'); - - // Due to security restrictions in the browsers, - // we can not read directly the files and run the unit tests - // The user has to select them manually, then launch the tests - $('#runTests').on('click', function(e) { - runTests(); - }); - - var runTests = function() { - - module("environment"); - test("qunit test", function() { - equal("test", "test", "QUnit is properly configured"); - }); - - test("check archive files are selected", function() { - var archiveFiles = document.getElementById('archiveFiles').files; - ok(archiveFiles && archiveFiles[0] && archiveFiles[0].size > 0, "First archive file set and not empty"); - ok(archiveFiles.length >= 5, "At least 5 files are selected"); - }); - - // Create a localArchive from selected files, in order to run the following tests - var localArchive = new evopediaArchive.LocalArchive(); - localArchive.initializeFromArchiveFiles(document.getElementById('archiveFiles').files); - - module("evopedia_title_search_and_read"); - asyncTest("check getTitlesStartingAtOffset 0", function() { - expect(4); - var callbackFunction = function(titleList) { - equal(titleList.length, 4, "4 titles found, as requested"); - var indexAbraham = -1; - for (var i = 0; i < titleList.length; i++) { - if (titleList[i] && titleList[i].name === "Abraham") { - indexAbraham = i; - } - } - ok(indexAbraham > -1, "Title 'Abraham' found"); - var firstTitleName = "not found"; - var secondTitleName = "not found"; - if (titleList.length >= 1 && titleList[0]) { - firstTitleName = titleList[0].name; - } - if (titleList.length >= 2 && titleList[1]) { - secondTitleName = titleList[1].name; - } - equal(firstTitleName, "Abbasid_Caliphate", "First article name is 'Abbasid_Caliphate'"); - equal(secondTitleName, "Abortion", "Second article name is 'Abortion'"); - start(); - }; - localArchive.getTitlesStartingAtOffset(0, 4, callbackFunction); - }); - - asyncTest("check findTitlesWithPrefix Am", function() { - expect(4); - var callbackFunction = function(titleList) { - ok(titleList && titleList.length > 0, "At least one title is found"); - var firstTitleName = "not found"; - var secondTitleName = "not found"; - if (titleList.length >= 1 && titleList[0]) { - firstTitleName = titleList[0].name; - } - if (titleList.length >= 2 && titleList[1]) { - secondTitleName = titleList[1].name; - } - equal(firstTitleName, "Amazon_River", "First article name is 'Amazon_River'"); - equal(secondTitleName, "American_Civil_War", "Second article name is 'American_Civil_War'"); - equal(titleList.length, 4, "4 titles should be found"); - start(); - }; - localArchive.findTitlesWithPrefix("Am", 10, callbackFunction); - }); - - // Create a title instance for the Article 'Abraham' - var titleAbraham = new evopediaTitle.Title(); - titleAbraham.archive = localArchive; - titleAbraham.articleLength = 10071; - titleAbraham.blockOffset = 127640; - titleAbraham.blockStart = 2364940; - titleAbraham.fileNr = 0; - titleAbraham.name = "Abraham"; - titleAbraham.titleOffset = 57; - - asyncTest("check getTitleByName with accents : Diego Velázquez", function() { - expect(2); - var callbackFunction = function(title) { - ok(title !== null, "Title found"); - equal(title.name, "Diego_Velázquez", "Name of the title is correct"); - start(); - }; - localArchive.getTitleByName("Diego_Velázquez", callbackFunction); - }); - asyncTest("check getTitleByName with quote : Hundred Years' War", function() { - expect(2); - var callbackFunction = function(title) { - ok(title !== null, "Title found"); - equal(title.name, "Hundred_Years'_War", "Name of the title is correct"); - start(); - }; - localArchive.getTitleByName("Hundred_Years'_War", callbackFunction); - }); - - test("check parseTitleFromId", function() { - var titleId = "small|2010-08-14|0|57|Abraham|2364940|127640|10071"; - var title = evopediaTitle.Title.parseTitleId(localArchive, titleId); - ok(title, "Title instance created"); - deepEqual(title, titleAbraham, "Parsing from titleId gives Abraham title"); - }); - - asyncTest("check readArticle", function() { - expect(3); - var callbackFunction = function(title, htmlArticle) { - ok(htmlArticle && htmlArticle.length > 0, "Article not empty"); - // Remove new lines - htmlArticle = htmlArticle.replace(/[\r\n]/g, " "); - ok(htmlArticle.match("^[ \t]*]*>Abraham"), "'Abraham' title at the beginning"); - ok(htmlArticle.match("[ \t]$"), " at the end"); - start(); - }; - localArchive.readArticle(titleAbraham, callbackFunction); - }); - - asyncTest("check getTitleByName and readArticle with escape bytes", function() { - expect(5); - var callbackArticleRead = function(title, htmlArticle) { - ok(htmlArticle && htmlArticle.length > 0, "Article not empty"); - // Remove new lines - htmlArticle = htmlArticle.replace(/[\r\n]/g, " "); - ok(htmlArticle.match("^[ \t]*]*>AIDS"), "'AIDS' title at the beginning"); - ok(htmlArticle.match("[ \t]$"), " at the end"); - start(); - }; - var callbackTitleFound = function(title) { - ok(title !== null, "Title found"); - equal(title.name, "AIDS", "Name of the title is correct"); - localArchive.readArticle(title, callbackArticleRead); - }; - localArchive.getTitleByName("AIDS", callbackTitleFound); - }); - - asyncTest("check getTitleByName with a title name that does not exist in the archive", function() { - expect(1); - var callbackTitleFound = function(title) { - ok(title === null, "No title found because it does not exist in the archive"); - start(); - }; - localArchive.getTitleByName("abcdef", callbackTitleFound); - }); - - asyncTest("check loading a math image", function() { - expect(2); - var callbackFunction = function(data) { - ok(data && data.length > 0, "Image not empty"); - // edb3069b82c68d270f6642c171cc6293.png should give a "1 1/2" formula (can be found in "Rational_number" article) - equal(data, - "iVBORw0KGgoAAAANSUhEUgAAABUAAAApBAMAAAAogX9zAAAAMFBMVEX///8AAADm5uZAQEDMzMwWFhYiIiIwMDBQUFCenp62trZiYmIMDAwEBASKiop0dHRvDVFEAAAAb0lEQVQY02NggAAmAwY4cE2AM9VNEWwG9oFhcxgKN9HJhYyCQCBApgs5jYMVYCKrGdgOwNgGDCzSMLYwA4MYjH2cgeEawjgWCQSbQwjBdpyAYMch2f4Awd7HwAVj8n1g4Iaxl+7e3Q1jXxQUlGMAAJkfGS29Qu04AAAAAElFTkSuQmCC", - "Math image corresponds to '1 1/2' png"); - start(); - }; - - localArchive.loadMathImage("edb3069b82c68d270f6642c171cc6293", callbackFunction); - }); - - module("geometry"); - test("check rectangle intersection", function() { - var rect1 = new geometry.rect(0,0,2,2); - var rect2 = new geometry.rect(1,1,2,2); - var rect3 = new geometry.rect(2,2,2,2); - var rect4 = new geometry.rect(1,1,1,1); - var rect5 = new geometry.rect(3,3,2,2); - var rect6 = new geometry.rect(2,0,1,10); - ok(rect1.intersect(rect2), "rect1 intersects rect2"); - ok(rect2.intersect(rect1), "rect2 intersects rect1"); - ok(rect2.intersect(rect3), "rect1 intersects rect3"); - ok(!rect1.intersect(rect3), "rect1 does not intersect rect3"); - ok(!rect4.intersect(rect3), "rect4 does not intersect rect3"); - ok(rect4.intersect(rect2), "rect4 intersects rect2"); - ok(!rect5.intersect(rect1), "rect5 does not intersect rect1"); - ok(!rect1.intersect(rect5), "rect1 does not intersect rect5"); - ok(rect6.intersect(rect2), "rect6 intersects rect2"); - ok(rect6.intersect(rect3), "rect6 intersects rect3"); - ok(!rect6.intersect(rect5), "rect6 intersects rect5"); - var rect7 = new geometry.rect(0,0,45.5,90,5); - var rect8 = new geometry.rect(0,40,10,10); - ok(rect8.intersect(rect7), "rect8 intersects rect7"); - }); - test("check rectangle contains a point", function() { - var rect1 = new geometry.rect(2,3,4,5); - var point1 = new geometry.point(1,1); - var point2 = new geometry.point(2,3); - var point3 = new geometry.point(4,4); - var point4 = new geometry.point(7,9); - var point5 = new geometry.point(4,6); - ok(!rect1.containsPoint(point1), "rect1 does not contain point1"); - ok(!rect1.containsPoint(point2), "rect1 does not contain point2"); - ok(rect1.containsPoint(point3), "rect1 contains point3"); - ok(!rect1.containsPoint(point4), "rect1 does not contain point4"); - ok(rect1.containsPoint(point5), "rect1 contains point5"); - }); - test("check normalization of a rectangle", function() { - var rect1 = new geometry.rect(2,3,4,5); - var normalizedRect1 = rect1.normalized(); - ok(rect1.x===normalizedRect1.x - && rect1.y===normalizedRect1.y - && rect1.width===normalizedRect1.width - && rect1.height===normalizedRect1.height, "rect1 is the same after normalization"); - var rect2 = new geometry.rect(6,3,-4,5); - var normalizedRect2 = rect2.normalized(); - //alert("normalizedRect2 = " + normalizedRect2); - ok(normalizedRect2.x===2 - && normalizedRect2.y===3 - && normalizedRect2.width===4 - && normalizedRect2.height===5, "rect2 successfully normalized by switching top left and top right corners"); - var rect3 = new geometry.rect(2,8,4,-5); - var normalizedRect3 = rect3.normalized(); - ok(normalizedRect3.x===2 - && normalizedRect3.y===3 - && normalizedRect3.width===4 - && normalizedRect3.height===5, "rect3 successfully normalized by switching top left and botton left corners"); - var rect4 = new geometry.rect(6,8,-4,-5); - var normalizedRect4 = rect4.normalized(); - ok(normalizedRect4.x===2 - && normalizedRect4.y===3 - && normalizedRect4.width===4 - && normalizedRect4.height===5, "rect4 successfully normalized by switching bottom right and top left corners"); - var rect5 = new geometry.rect(12,2,-4,-1); - var normalizedRect5 = rect5.normalized(); - ok(normalizedRect5.x===8 - && normalizedRect5.y===1 - && normalizedRect5.width===4 - && normalizedRect5.height===1, "rect5 successfully normalized by switching bottom right and top left corners"); - }); - test("check rectangle constructor from top-left and bottom-right points", function() { - var topLeft = new geometry.point(2,5); - var bottomRight = new geometry.point(5,3); - var rect = new geometry.rect(topLeft, bottomRight); - equal(rect.x, 2 , "rect.x should be 2"); - equal(rect.y, 3 , "rect.y should be 3"); - equal(rect.width, 3 , "rect.width should be 3"); - equal(rect.height, 2 , "rect.height should be 2"); - }); - test("check rectangle contains another rectangle", function() { - var rect1 = new geometry.rect(2,3,4,4); - var rect2 = new geometry.rect(3,4,1,1); - var rect3 = new geometry.rect(1,1,1,1); - var rect4 = new geometry.rect(3,1,2,4); - var rect5 = new geometry.rect(3,1,6,4); - var rect6 = new geometry.rect(2,3,3,2); - var rect7 = new geometry.rect(5,6,-3,-2); // same as rect7 but not normalized - ok(rect1.contains(rect2), "rect1 should contain rect2"); - ok(!rect2.contains(rect1), "rect2 should not contain rect1"); - ok(!rect1.contains(rect3), "rect1 should not contain rect3"); - ok(!rect1.contains(rect4), "rect1 should not contain rect4"); - ok(!rect1.contains(rect5), "rect1 should not contain rect5"); - ok(rect1.contains(rect1), "rect1 should contain rect1"); - ok(rect1.contains(rect6), "rect1 should contain rect6"); - ok(rect1.contains(rect7), "rect1 should contain rect7"); - }); - - module("utils"); - test("check reading an IEEE_754 float from 4 bytes" ,function() { - var byteArray = new Uint8Array(4); - // This example is taken from https://fr.wikipedia.org/wiki/IEEE_754#Un_exemple_plus_complexe - // 1100 0010 1110 1101 0100 0000 0000 0000 - byteArray[0] = 194; - byteArray[1] = 237; - byteArray[2] = 64; - byteArray[3] = 0; - var float = util.readFloatFrom4Bytes(byteArray, 0); - equal(float, -118.625, "the IEEE_754 float should be converted as -118.625"); - }); - - module("evopedia_articles_nearby"); - asyncTest("check articles found nearby France and Germany", function() { - expect(5); - var callbackTitlesNearbyFound = function(titleList) { - ok(titleList !== null, "Some titles should be found"); - equal(titleList.length, 3, "3 titles should be found"); - var titleDanube = null; - var titleParis = null; - var titleAlps = null; - for (var i=0; i + */ +define(function(require) { + + var $ = require('jquery'); + var evopediaTitle = require('title'); + var evopediaArchive = require('archive'); + var geometry = require('geometry'); + var util = require('util'); + + // Due to security restrictions in the browsers, + // we can not read directly the files and run the unit tests + // The user has to select them manually, then launch the tests + $('#runTests').on('click', function(e) { + runTests(); + }); + + var runTests = function() { + + module("environment"); + test("qunit test", function() { + equal("test", "test", "QUnit is properly configured"); + }); + + test("check archive files are selected", function() { + var archiveFiles = document.getElementById('archiveFiles').files; + ok(archiveFiles && archiveFiles[0] && archiveFiles[0].size > 0, "First archive file set and not empty"); + ok(archiveFiles.length >= 5, "At least 5 files are selected"); + }); + + // Create a localArchive from selected files, in order to run the following tests + var localArchive = new evopediaArchive.LocalArchive(); + localArchive.initializeFromArchiveFiles(document.getElementById('archiveFiles').files); + + module("evopedia_title_search_and_read"); + asyncTest("check getTitlesStartingAtOffset 0", function() { + expect(4); + var callbackFunction = function(titleList) { + equal(titleList.length, 4, "4 titles found, as requested"); + var indexAbraham = -1; + for (var i = 0; i < titleList.length; i++) { + if (titleList[i] && titleList[i]._name === "Abraham") { + indexAbraham = i; + } + } + ok(indexAbraham > -1, "Title 'Abraham' found"); + var firstTitleName = "not found"; + var secondTitleName = "not found"; + if (titleList.length >= 1 && titleList[0]) { + firstTitleName = titleList[0]._name; + } + if (titleList.length >= 2 && titleList[1]) { + secondTitleName = titleList[1]._name; + } + equal(firstTitleName, "Abbasid_Caliphate", "First article name is 'Abbasid_Caliphate'"); + equal(secondTitleName, "Abortion", "Second article name is 'Abortion'"); + start(); + }; + localArchive.getTitlesStartingAtOffset(0, 4, callbackFunction); + }); + + asyncTest("check findTitlesWithPrefix Am", function() { + expect(4); + var callbackFunction = function(titleList) { + ok(titleList && titleList.length > 0, "At least one title is found"); + var firstTitleName = "not found"; + var secondTitleName = "not found"; + if (titleList.length >= 1 && titleList[0]) { + firstTitleName = titleList[0]._name; + } + if (titleList.length >= 2 && titleList[1]) { + secondTitleName = titleList[1]._name; + } + equal(firstTitleName, "Amazon_River", "First article name is 'Amazon_River'"); + equal(secondTitleName, "American_Civil_War", "Second article name is 'American_Civil_War'"); + equal(titleList.length, 4, "4 titles should be found"); + start(); + }; + localArchive.findTitlesWithPrefix("Am", 10, callbackFunction); + }); + + // Create a title instance for the Article 'Abraham' + var titleAbraham = new evopediaTitle.Title(); + titleAbraham._archive = localArchive; + titleAbraham._articleLength = 10071; + titleAbraham._blockOffset = 127640; + titleAbraham._blockStart = 2364940; + titleAbraham._fileNr = 0; + titleAbraham._name = "Abraham"; + titleAbraham._titleOffset = 57; + + asyncTest("check getTitleByName with accents : Diego Velázquez", function() { + expect(2); + var callbackFunction = function(title) { + ok(title !== null, "Title found"); + equal(title._name, "Diego_Velázquez", "Name of the title is correct"); + start(); + }; + localArchive.getTitleByName("Diego_Velázquez", callbackFunction); + }); + asyncTest("check getTitleByName with quote : Hundred Years' War", function() { + expect(2); + var callbackFunction = function(title) { + ok(title !== null, "Title found"); + equal(title._name, "Hundred_Years'_War", "Name of the title is correct"); + start(); + }; + localArchive.getTitleByName("Hundred_Years'_War", callbackFunction); + }); + + test("check parseTitleFromId", function() { + var titleId = "small|2010-08-14|0|57|Abraham|2364940|127640|10071"; + var title = evopediaTitle.Title.parseTitleId(localArchive, titleId); + ok(title, "Title instance created"); + deepEqual(title, titleAbraham, "Parsing from titleId gives Abraham title"); + }); + + asyncTest("check readArticle", function() { + expect(3); + var callbackFunction = function(title, htmlArticle) { + ok(htmlArticle && htmlArticle.length > 0, "Article not empty"); + // Remove new lines + htmlArticle = htmlArticle.replace(/[\r\n]/g, " "); + ok(htmlArticle.match("^[ \t]*]*>Abraham"), "'Abraham' title at the beginning"); + ok(htmlArticle.match("[ \t]$"), " at the end"); + start(); + }; + localArchive.readArticle(titleAbraham, callbackFunction); + }); + + asyncTest("check getTitleByName and readArticle with escape bytes", function() { + expect(5); + var callbackArticleRead = function(title, htmlArticle) { + ok(htmlArticle && htmlArticle.length > 0, "Article not empty"); + // Remove new lines + htmlArticle = htmlArticle.replace(/[\r\n]/g, " "); + ok(htmlArticle.match("^[ \t]*]*>AIDS"), "'AIDS' title at the beginning"); + ok(htmlArticle.match("[ \t]$"), " at the end"); + start(); + }; + var callbackTitleFound = function(title) { + ok(title !== null, "Title found"); + equal(title._name, "AIDS", "Name of the title is correct"); + localArchive.readArticle(title, callbackArticleRead); + }; + localArchive.getTitleByName("AIDS", callbackTitleFound); + }); + + asyncTest("check getTitleByName with a title name that does not exist in the archive", function() { + expect(1); + var callbackTitleFound = function(title) { + ok(title === null, "No title found because it does not exist in the archive"); + start(); + }; + localArchive.getTitleByName("abcdef", callbackTitleFound); + }); + + asyncTest("check loading a math image", function() { + expect(2); + var callbackFunction = function(data) { + ok(data && data.length > 0, "Image not empty"); + // edb3069b82c68d270f6642c171cc6293.png should give a "1 1/2" formula (can be found in "Rational_number" article) + equal(data, + "iVBORw0KGgoAAAANSUhEUgAAABUAAAApBAMAAAAogX9zAAAAMFBMVEX///8AAADm5uZAQEDMzMwWFhYiIiIwMDBQUFCenp62trZiYmIMDAwEBASKiop0dHRvDVFEAAAAb0lEQVQY02NggAAmAwY4cE2AM9VNEWwG9oFhcxgKN9HJhYyCQCBApgs5jYMVYCKrGdgOwNgGDCzSMLYwA4MYjH2cgeEawjgWCQSbQwjBdpyAYMch2f4Awd7HwAVj8n1g4Iaxl+7e3Q1jXxQUlGMAAJkfGS29Qu04AAAAAElFTkSuQmCC", + "Math image corresponds to '1 1/2' png"); + start(); + }; + + localArchive.loadMathImage("edb3069b82c68d270f6642c171cc6293", callbackFunction); + }); + + module("geometry"); + test("check rectangle intersection", function() { + var rect1 = new geometry.rect(0,0,2,2); + var rect2 = new geometry.rect(1,1,2,2); + var rect3 = new geometry.rect(2,2,2,2); + var rect4 = new geometry.rect(1,1,1,1); + var rect5 = new geometry.rect(3,3,2,2); + var rect6 = new geometry.rect(2,0,1,10); + ok(rect1.intersect(rect2), "rect1 intersects rect2"); + ok(rect2.intersect(rect1), "rect2 intersects rect1"); + ok(rect2.intersect(rect3), "rect1 intersects rect3"); + ok(!rect1.intersect(rect3), "rect1 does not intersect rect3"); + ok(!rect4.intersect(rect3), "rect4 does not intersect rect3"); + ok(rect4.intersect(rect2), "rect4 intersects rect2"); + ok(!rect5.intersect(rect1), "rect5 does not intersect rect1"); + ok(!rect1.intersect(rect5), "rect1 does not intersect rect5"); + ok(rect6.intersect(rect2), "rect6 intersects rect2"); + ok(rect6.intersect(rect3), "rect6 intersects rect3"); + ok(!rect6.intersect(rect5), "rect6 intersects rect5"); + var rect7 = new geometry.rect(0,0,45.5,90,5); + var rect8 = new geometry.rect(0,40,10,10); + ok(rect8.intersect(rect7), "rect8 intersects rect7"); + }); + test("check rectangle contains a point", function() { + var rect1 = new geometry.rect(2,3,4,5); + var point1 = new geometry.point(1,1); + var point2 = new geometry.point(2,3); + var point3 = new geometry.point(4,4); + var point4 = new geometry.point(7,9); + var point5 = new geometry.point(4,6); + ok(!rect1.containsPoint(point1), "rect1 does not contain point1"); + ok(!rect1.containsPoint(point2), "rect1 does not contain point2"); + ok(rect1.containsPoint(point3), "rect1 contains point3"); + ok(!rect1.containsPoint(point4), "rect1 does not contain point4"); + ok(rect1.containsPoint(point5), "rect1 contains point5"); + }); + test("check normalization of a rectangle", function() { + var rect1 = new geometry.rect(2,3,4,5); + var normalizedRect1 = rect1.normalized(); + ok(rect1.x===normalizedRect1.x + && rect1.y===normalizedRect1.y + && rect1.width===normalizedRect1.width + && rect1.height===normalizedRect1.height, "rect1 is the same after normalization"); + var rect2 = new geometry.rect(6,3,-4,5); + var normalizedRect2 = rect2.normalized(); + //alert("normalizedRect2 = " + normalizedRect2); + ok(normalizedRect2.x===2 + && normalizedRect2.y===3 + && normalizedRect2.width===4 + && normalizedRect2.height===5, "rect2 successfully normalized by switching top left and top right corners"); + var rect3 = new geometry.rect(2,8,4,-5); + var normalizedRect3 = rect3.normalized(); + ok(normalizedRect3.x===2 + && normalizedRect3.y===3 + && normalizedRect3.width===4 + && normalizedRect3.height===5, "rect3 successfully normalized by switching top left and botton left corners"); + var rect4 = new geometry.rect(6,8,-4,-5); + var normalizedRect4 = rect4.normalized(); + ok(normalizedRect4.x===2 + && normalizedRect4.y===3 + && normalizedRect4.width===4 + && normalizedRect4.height===5, "rect4 successfully normalized by switching bottom right and top left corners"); + var rect5 = new geometry.rect(12,2,-4,-1); + var normalizedRect5 = rect5.normalized(); + ok(normalizedRect5.x===8 + && normalizedRect5.y===1 + && normalizedRect5.width===4 + && normalizedRect5.height===1, "rect5 successfully normalized by switching bottom right and top left corners"); + }); + test("check rectangle constructor from top-left and bottom-right points", function() { + var topLeft = new geometry.point(2,5); + var bottomRight = new geometry.point(5,3); + var rect = new geometry.rect(topLeft, bottomRight); + equal(rect.x, 2 , "rect.x should be 2"); + equal(rect.y, 3 , "rect.y should be 3"); + equal(rect.width, 3 , "rect.width should be 3"); + equal(rect.height, 2 , "rect.height should be 2"); + }); + test("check rectangle contains another rectangle", function() { + var rect1 = new geometry.rect(2,3,4,4); + var rect2 = new geometry.rect(3,4,1,1); + var rect3 = new geometry.rect(1,1,1,1); + var rect4 = new geometry.rect(3,1,2,4); + var rect5 = new geometry.rect(3,1,6,4); + var rect6 = new geometry.rect(2,3,3,2); + var rect7 = new geometry.rect(5,6,-3,-2); // same as rect7 but not normalized + ok(rect1.contains(rect2), "rect1 should contain rect2"); + ok(!rect2.contains(rect1), "rect2 should not contain rect1"); + ok(!rect1.contains(rect3), "rect1 should not contain rect3"); + ok(!rect1.contains(rect4), "rect1 should not contain rect4"); + ok(!rect1.contains(rect5), "rect1 should not contain rect5"); + ok(rect1.contains(rect1), "rect1 should contain rect1"); + ok(rect1.contains(rect6), "rect1 should contain rect6"); + ok(rect1.contains(rect7), "rect1 should contain rect7"); + }); + + module("utils"); + test("check reading an IEEE_754 float from 4 bytes" ,function() { + var byteArray = new Uint8Array(4); + // This example is taken from https://fr.wikipedia.org/wiki/IEEE_754#Un_exemple_plus_complexe + // 1100 0010 1110 1101 0100 0000 0000 0000 + byteArray[0] = 194; + byteArray[1] = 237; + byteArray[2] = 64; + byteArray[3] = 0; + var float = util.readFloatFrom4Bytes(byteArray, 0); + equal(float, -118.625, "the IEEE_754 float should be converted as -118.625"); + }); + + module("evopedia_articles_nearby"); + asyncTest("check articles found nearby France and Germany", function() { + expect(5); + var callbackTitlesNearbyFound = function(titleList) { + ok(titleList !== null, "Some titles should be found"); + equal(titleList.length, 3, "3 titles should be found"); + var titleDanube = null; + var titleParis = null; + var titleAlps = null; + for (var i=0; i 0) { + if (localArchive._dataFiles && localArchive._dataFiles.length > 0) { var title = evopediaTitle.Title.parseTitleId(localArchive, titleId); - $("#articleName").html(title.name); + $("#articleName").html(title._name); $("#readingArticle").show(); $("#articleContent").html(""); - if (title.fileNr === 255) { + if (title._fileNr === 255) { localArchive.resolveRedirect(title, readArticle); } else { @@ -438,7 +438,7 @@ define(function(require) { * @param {type} title */ function readArticle(title) { - if (title.fileNr === 255) { + if (title._fileNr === 255) { localArchive.resolveRedirect(title, readArticle); } else { @@ -571,7 +571,7 @@ define(function(require) { $('#configuration').hide(); $('#titleList').hide(); $('#articleContent').empty(); - if (localArchive !== null && localArchive.titleFile !== null) { + if (localArchive !== null && localArchive._titleFile !== null) { var longitude = $('#longitude').val(); var latitude = $('#latitude').val(); var maxDistance = $('#maxDistance').val(); @@ -659,7 +659,7 @@ define(function(require) { alert("Error finding random article."); } else { - $("#articleName").html(title.name); + $("#articleName").html(title._name); $("#readingArticle").show(); $("#articleContent").html(""); readArticle(title); diff --git a/www/js/lib/archive.js b/www/js/lib/archive.js index bf441b9f..21345d78 100644 --- a/www/js/lib/archive.js +++ b/www/js/lib/archive.js @@ -1,947 +1,947 @@ -/** - * archive.js : Class for a local Evopedia archive, with the algorithms to read it - * This file handles finding a title in an archive, reading an article in an archive etc - * - * Copyright 2013-2014 Mossroy and contributors - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(function(require) { - - // Module dependencies - var normalize_string = require('normalize_string'); - var util = require('util'); - var geometry = require('geometry'); - var jQuery = require('jquery'); - var titleIterators = require('titleIterators'); - - // Declare the webworker that can uncompress with bzip2 algorithm - var webworkerBzip2; - try { - // When using the application normally - webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js"); - } - catch(e) { - // When using unit tests - webworkerBzip2 = new Worker("www/js/lib/webworker_bzip2.js"); - } - - // Size of chunks read in the dump files : 128 KB - var CHUNK_SIZE = 131072; - // A rectangle representing all the earth globe - var GLOBE_RECTANGLE = new geometry.rect(-91, -181, 182, 362); - - /** - * LocalArchive class : defines a wikipedia dump on the filesystem - */ - function LocalArchive() { - this.dataFiles = new Array(); - this.coordinateFiles = new Array(); - this.titleFile = null; - this.mathIndexFile = null; - this.mathDataFile = null; - this.date = null; - this.language = null; - this.titleSearchFile = null; - this.normalizedTitles = true; - }; - - - /** - * Read the title Files in the given directory, and assign them to the - * current LocalArchive - * - * @param storage - * @param directory - */ - LocalArchive.prototype.readTitleFilesFromStorage = function(storage, directory) { - var currentLocalArchiveInstance = this; - storage.get(directory + 'titles.idx').then(function(file) { - currentLocalArchiveInstance.titleFile = file; - }, function(error) { - alert("Error reading title file in directory " + directory + " : " + error); - }); - storage.get(directory + 'titles_search.idx').then(function(file) { - currentLocalArchiveInstance.titleSearchFile = file; - }, function(error) { - // Do nothing : this file is not mandatory in an archive - }); - }; - - /** - * Read the data Files in the given directory (starting at given index), and - * assign them to the current LocalArchive - * - * @param storage - * @param directory - * @param index - */ - LocalArchive.prototype.readDataFilesFromStorage = function(storage, directory, index) { - var currentLocalArchiveInstance = this; - - var prefixedFileNumber = ""; - if (index < 10) { - prefixedFileNumber = "0" + index; - } else { - prefixedFileNumber = index; - } - storage.get(directory + 'wikipedia_' + prefixedFileNumber + '.dat') - .then(function(file) { - currentLocalArchiveInstance.dataFiles[index] = file; - currentLocalArchiveInstance.readDataFilesFromStorage(storage, directory, - index + 1); - }, function(error) { - // TODO there must be a better way to detect a FileNotFound - if (error != "NotFoundError") { - alert("Error reading data file " + index + " in directory " - + directory + " : " + error); - } - }); - }; - - /** - * Read the coordinate Files in the given directory (starting at given index), and - * assign them to the current LocalArchive - * - * @param storage - * @param directory - * @param index - */ - LocalArchive.prototype.readCoordinateFilesFromStorage = function(storage, directory, index) { - var currentLocalArchiveInstance = this; - - var prefixedFileNumber = ""; - if (index < 10) { - prefixedFileNumber = "0" + index; - } else { - prefixedFileNumber = index; - } - storage.get(directory + 'coordinates_' + prefixedFileNumber - + '.idx').then(function(file) { - currentLocalArchiveInstance.coordinateFiles[index - 1] = file; - currentLocalArchiveInstance.readCoordinateFilesFromStorage(storage, directory, - index + 1); - }, function(error) { - // TODO there must be a better way to detect a FileNotFound - if (error != "NotFoundError") { - alert("Error reading coordinates file " + index + " in directory " - + directory + " : " + error); - } - }); - }; - - /** - * Read the metadata.txt file in the given directory, and store its content - * in the current instance - * - * @param storage - * @param directory - */ - LocalArchive.prototype.readMetadataFileFromStorage = function(storage, directory) { - var currentLocalArchiveInstance = this; - - storage.get(directory + 'metadata.txt').then(function(file) { - var metadataFile = file; - currentLocalArchiveInstance.readMetadataFile(metadataFile); - }, function(error) { - alert("Error reading metadata.txt file in directory " - + directory + " : " + error); - }); - }; - - /** - * Read the metadata file, in order to populate its values in the current - * instance - * @param {File} file metadata.txt file - */ - LocalArchive.prototype.readMetadataFile = function(file) { - var currentLocalArchiveInstance = this; - var reader = new FileReader(); - reader.onload = function(e) { - var metadata = e.target.result; - currentLocalArchiveInstance.language = /\nlanguage ?\= ?([^ \n]+)/.exec(metadata)[1]; - currentLocalArchiveInstance.date = /\ndate ?\= ?([^ \n]+)/.exec(metadata)[1]; - var normalizedTitlesRegex = /\nnormalized_titles ?\= ?([^ \n]+)/; - if (normalizedTitlesRegex.exec(metadata)) { - var normalizedTitlesInt = normalizedTitlesRegex.exec(metadata)[1]; - if (normalizedTitlesInt === "0") { - currentLocalArchiveInstance.normalizedTitles = false; - } - else { - currentLocalArchiveInstance.normalizedTitles = true; - } - } - else { - currentLocalArchiveInstance.normalizedTitles = true; - } - }; - reader.readAsText(file); - }; - - /** - * Initialize the localArchive from given archive files - * @param {type} archiveFiles - */ - LocalArchive.prototype.initializeFromArchiveFiles = function(archiveFiles) { - var dataFileRegex = /^wikipedia_(\d\d).dat$/; - var coordinateFileRegex = /^coordinates_(\d\d).idx$/; - this.dataFiles = new Array(); - this.coordinateFiles = new Array(); - for (var i=0; i 0) { - var intFileNr = 1 * coordinateFileNr[1]; - this.coordinateFiles[intFileNr - 1] = file; - } - else { - var dataFileNr = dataFileRegex.exec(file.name); - if (dataFileNr && dataFileNr.length > 0) { - var intFileNr = 1 * dataFileNr[1]; - this.dataFiles[intFileNr] = file; - } - } - } - } - } - - }; - - /** - * Initialize the localArchive from given directory, using DeviceStorage - * @param {type} storages List of DeviceStorages available - * @param {type} archiveDirectory - */ - LocalArchive.prototype.initializeFromDeviceStorage = function(storages, archiveDirectory) { - // First, we have to find which DeviceStorage has been selected by the user - // It is the prefix of the archive directory - var storageNameRegex = /^\/([^\/]+)\//; - var regexResults = storageNameRegex.exec(archiveDirectory); - var selectedStorage = null; - if (regexResults && regexResults.length>0) { - var selectedStorageName = regexResults[1]; - for (var i=0; i= titleCount) { - return titles; - } - return iterator.advance().then(function(title) { - if (title === null) - return titles; - titles.push(title); - return addNext(); - }); - } - return addNext(); - }).then(callbackFunction, errorHandler); - }; - - /** - * Look for a title by its name, and call the callbackFunction with this Title - * If the title is not found, the callbackFunction is called with parameter null - * @param titleName - * @param callbackFunction - */ - LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) { - var that = this; - var normalize = this.getNormalizeFunction(); - var normalizedTitleName = normalize(titleName); - - titleIterators.findPrefixOffset(this.titleFile, titleName, normalize).then(function(offset) { - var iterator = new titleIterators.SequentialTitleIterator(that, offset); - function check(title) { - if (title === null || normalize(title.name) !== normalizedTitleName) { - return null; - } else if (title.name === titleName) { - return title; - } else { - return iterator.advance().then(check); - } - } - return iterator.advance().then(check); - }).then(callbackFunction, errorHandler); - }; - - /** - * Get a random title, and call the callbackFunction with this Title - * @param callbackFunction - */ - LocalArchive.prototype.getRandomTitle = function(callbackFunction) { - var that = this; - var offset = Math.floor(Math.random() * this.titleFile.size); - jQuery.when().then(function() { - return util.readFileSlice(that.titleFile, offset, - offset + titleIterators.MAX_TITLE_LENGTH).then(function(byteArray) { - // Let's find the next newLine - var newLineIndex = 0; - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - var iterator = new titleIterators.SequentialTitleIterator(that, offset + newLineIndex + 1); - return iterator.advance(); - }); - }).then(callbackFunction, errorHandler); - }; - - /** - * Find titles that start with the given prefix, and call the callbackFunction with this list of Titles - * @param prefix - * @param maxSize Maximum number of titles to read - * @param callbackFunction - */ - LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) { - var that = this; - var titles = []; - var normalize = this.getNormalizeFunction(); - prefix = normalize(prefix); - - titleIterators.findPrefixOffset(this.titleFile, prefix, normalize).then(function(offset) { - var iterator = new titleIterators.SequentialTitleIterator(that, offset); - function addNext() { - if (titles.length >= maxSize) { - return jQuery.Deferred().resolve(titles, maxSize); - } - return iterator.advance().then(function(title) { - if (title === null) - return jQuery.Deferred().resolve(titles, maxSize); - // check whether this title really starts with the prefix - var name = normalize(title.name); - if (name.length < prefix.length || name.substring(0, prefix.length) !== prefix) - return jQuery.Deferred().resolve(titles, maxSize); - titles.push(title); - return addNext(); - }); - } - return addNext(); - }).then(callbackFunction, errorHandler); - }; - - - /** - * Read an article from the title instance, and call the - * callbackFunction with the article HTML String - * - * @param title - * @param callbackFunction - */ - LocalArchive.prototype.readArticle = function(title, callbackFunction) { - var dataFile = null; - - var prefixedFileNumber = ""; - if (title.fileNr < 10) { - prefixedFileNumber = "0" + title.fileNr; - } else { - prefixedFileNumber = title.fileNr; - } - var expectedFileName = "wikipedia_" + prefixedFileNumber + ".dat"; - - // Find the good dump file - for (var i = 0; i < this.dataFiles.length; i++) { - var fileName = this.dataFiles[i].name; - // Check if the fileName ends with the expected file name (in case - // of DeviceStorage usage, the fileName is prefixed by the - // directory) - if (fileName.match(expectedFileName + "$") == expectedFileName) { - dataFile = this.dataFiles[i]; - } - } - if (!dataFile) { - // TODO can probably be replaced by some error handler at window level - alert("Oops : some files seem to be missing in your archive. Please report this problem to us by email (see About section), with the names of the archive and article, and the following info : " - + "File number " + title.fileNr + " not found"); - throw new Error("File number " + title.fileNr + " not found"); - } else { - var reader = new FileReader(); - // Read the article in the dataFile, starting with a chunk of CHUNK_SIZE - this.readArticleChunk(title, dataFile, reader, CHUNK_SIZE, callbackFunction); - } - - }; - - /** - * Read a chunk of the dataFile (of the given length) to try to read the - * given article. - * If the bzip2 algorithm works and articleLength of the article is reached, - * call the callbackFunction with the article HTML String. - * Else, recursively call this function with readLength + CHUNK_SIZE - * - * @param title - * @param dataFile - * @param reader - * @param readLength - * @param callbackFunction - */ - LocalArchive.prototype.readArticleChunk = function(title, dataFile, reader, - readLength, callbackFunction) { - var currentLocalArchiveInstance = this; - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Data file read cancelled'); - }; - reader.onload = function(e) { - try { - var compressedArticles = e.target.result; - webworkerBzip2.onerror = function(event){ - // TODO can probably be replaced by some error handler at window level - alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" - + event.message + " filename=" + event.filename + " line number=" + event.lineno ); - throw new Error("Error during bzip2 decompression : " + event.message + " (" + event.filename + ":" + event.lineno + ")"); - }; - webworkerBzip2.onmessage = function(event){ - switch (event.data.cmd){ - case "result": - var htmlArticles = event.data.msg; - // Start reading at offset, and keep length characters - var htmlArticle = htmlArticles.substring(title.blockOffset, - title.blockOffset + title.articleLength); - if (htmlArticle.length >= title.articleLength) { - // Keep only length characters - htmlArticle = htmlArticle.substring(0, title.articleLength); - // Decode UTF-8 encoding - htmlArticle = decodeURIComponent(escape(htmlArticle)); - callbackFunction(title, htmlArticle); - } else { - // TODO : throw exception if we reach the end of the file - currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, - callbackFunction); - } - break; - case "recurse": - currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, callbackFunction); - break; - case "debug": - console.log(event.data.msg); - break; - case "error": - // TODO can probably be replaced by some error handler at window level - alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" - + event.data.msg ); - throw new Error("Error during bzip2 decompression : " + event.data.msg); - break; - } - }; - webworkerBzip2.postMessage({cmd : 'uncompress', msg : - new Uint8Array(compressedArticles)}); - - } - catch (e) { - callbackFunction("Error : " + e); - } - }; - var blob = dataFile.slice(title.blockStart, title.blockStart - + readLength); - - // Read in the image file as a binary string. - reader.readAsArrayBuffer(blob); - }; - - /** - * Load the math image specified by the hex string and call the - * callbackFunction with a base64 encoding of its data. - * - * @param hexString - * @param callbackFunction - */ - LocalArchive.prototype.loadMathImage = function(hexString, callbackFunction) { - var entrySize = 16 + 4 + 4; - var lo = 0; - var hi = this.mathIndexFile.size / entrySize; - - var mathDataFile = this.mathDataFile; - - this.findMathDataPosition(hexString, lo, hi, function(pos, length) { - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Math image file read cancelled'); - }; - var blob = mathDataFile.slice(pos, pos + length); - reader.onload = function(e) { - var byteArray = new Uint8Array(e.target.result); - callbackFunction(util.uint8ArrayToBase64(byteArray)); - }; - reader.readAsArrayBuffer(blob); - }); - }; - - - /** - * Recursive algorithm to find the position of the Math image in the data file - * @param {type} hexString - * @param {type} lo - * @param {type} hi - * @param {type} callbackFunction - */ - LocalArchive.prototype.findMathDataPosition = function(hexString, lo, hi, callbackFunction) { - var entrySize = 16 + 4 + 4; - if (lo >= hi) { - /* TODO error - not found */ - return; - } - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Math image file read cancelled'); - }; - var mid = Math.floor((lo + hi) / 2); - var blob = this.mathIndexFile.slice(mid * entrySize, (mid + 1) * entrySize); - var currentLocalArchiveInstance = this; - reader.onload = function(e) { - var byteArray = new Uint8Array(e.target.result); - var hash = util.uint8ArrayToHex(byteArray.subarray(0, 16)); - if (hash == hexString) { - var pos = util.readIntegerFrom4Bytes(byteArray, 16); - var length = util.readIntegerFrom4Bytes(byteArray, 16 + 4); - callbackFunction(pos, length); - return; - } else if (hexString < hash) { - hi = mid; - } else { - lo = mid + 1; - } - - currentLocalArchiveInstance.findMathDataPosition(hexString, lo, hi, callbackFunction); - }; - // Read the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - - /** - * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance - * @param title - * @param callbackFunction - */ - LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) { - var reader = new FileReader(); - reader.onerror = errorHandler; - reader.onabort = function(e) { - alert('Title file read cancelled'); - }; - reader.onload = function(e) { - var binaryTitleFile = e.target.result; - var byteArray = new Uint8Array(binaryTitleFile); - - if (byteArray.length === 0) { - // TODO can probably be replaced by some error handler at window level - alert("Oops : there seems to be something wrong in your archive. Please report it to us by email or through Github (see About section), with the names of the archive and article and the following info : " - + "Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); - throw new Error("Unable to find redirected article for title " + title.name + " : offset " + title.blockStart + " not found in title file"); - } - - var redirectedTitle = title; - redirectedTitle.fileNr = 1 * byteArray[2]; - redirectedTitle.blockStart = util.readIntegerFrom4Bytes(byteArray, 3); - redirectedTitle.blockOffset = util.readIntegerFrom4Bytes(byteArray, 7); - redirectedTitle.articleLength = util.readIntegerFrom4Bytes(byteArray, 11); - - callbackFunction(redirectedTitle); - }; - // Read only the 16 necessary bytes, starting at title.blockStart - var blob = this.titleFile.slice(title.blockStart, title.blockStart + 16); - // Read in the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - // This is a global counter that helps find out when the search for articles nearby is over - var callbackCounterForTitlesInCoordsSearch = 0; - - /** - * Finds titles that are located inside the given rectangle - * This is the main function, that has to be called from the application - * - * @param {type} rect Rectangle where to look for titles - * @param {type} maxTitles Maximum number of titles to find - * @param callbackFunction Function to call with the list of titles found - */ - LocalArchive.prototype.getTitlesInCoords = function(rect, maxTitles, callbackFunction) { - if (callbackCounterForTitlesInCoordsSearch > 0) { - alert("The last nearby search did not seem to end well : please try again"); - callbackCounterForTitlesInCoordsSearch = 0; - return; - } - var normalizedRectangle = rect.normalized(); - var titlePositionsFound = new Array(); - for (var i = 0; i < this.coordinateFiles.length; i++) { - callbackCounterForTitlesInCoordsSearch++; - LocalArchive.getTitlesInCoordsInt(this, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, titlePositionsFound, callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); - } - }; - - /** - * Callback function called by getTitlesInCoordsInt (or by itself), in order - * to loop through every coordinate file, and search titles nearby in each - * of them. - * When all the coordinate files are searched, or when enough titles are - * found, another function is called to convert the title positions found - * into Title instances (asynchronously) - * - * @param {type} localArchive - * @param {type} titlePositionsFound - * @param {type} maxTitles - * @param {type} callbackFunction - */ - LocalArchive.callbackGetTitlesInCoordsInt = function(localArchive, titlePositionsFound, maxTitles, callbackFunction) { - // Search is over : now let's convert the title positions into Title instances - if (titlePositionsFound && titlePositionsFound.length > 0) { - LocalArchive.readTitlesFromTitleCoordsInTitleFile(localArchive, titlePositionsFound, 0, new Array(), maxTitles, callbackFunction); - } - else { - callbackFunction(titlePositionsFound, maxTitles); - } - - }; - - /** - * This function reads a list of title positions, and converts it into a list or Title instances. - * It handles index i, then recursively calls itself for index i+1 - * When all the list is processed, the callbackFunction is called with the Title list - * - * @param {type} localArchive - * @param {type} titlePositionsFound - * @param {type} i - * @param {type} titlesFound - * @param maxTitles - * @param {type} callbackFunction - */ - LocalArchive.readTitlesFromTitleCoordsInTitleFile = function (localArchive, titlePositionsFound, i, titlesFound, maxTitles, callbackFunction) { - var titleOffset = titlePositionsFound[i]; - localArchive.getTitlesStartingAtOffset(titleOffset, 1, function(titleList) { - if (titleList && titleList.length === 1) { - titlesFound.push(titleList[0]); - i++; - if (i= 0 && titlePositionsFound.length < maxTitles) { - titlePositionsFound.push(title_pos); - console.log("maxTitles="+maxTitles+" titlePositionsFound.length="+titlePositionsFound.length); - } - } - } - if (callbackCounterForTitlesInCoordsSearch === 0) { - console.log("callbackCounterForTitlesInCoordsSearch reached 0 : return the titles found"); - callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, maxTitles, callbackFunction); - } - - }; - // Read 22 bytes in the coordinate files, at coordFilePos index, in order to read the selector and the coordinates - // 2 + 4 + 4 + 3 * 4 = 22 - // As there can be up to 65535 different coordinates, we have to read 22*65535 bytes = 1.44MB - // TODO : This should be improved by reading the file in 2 steps : - // - first read the selector - // - then read the coordinates (reading only the exact necessary bytes) - var blob = localArchive.coordinateFiles[coordinateFileIndex].slice(coordFilePos, coordFilePos + 22*65535); - - // Read in the file as a binary string - reader.readAsArrayBuffer(blob); - }; - - /** - *  Scans the DeviceStorage for archives - * - * @param storages List of DeviceStorage instances - * @param callbackFunction Function to call with the list of directories where archives are found - */ - LocalArchive.scanForArchives = function(storages, callbackFunction) { - var directories = []; - var promises = jQuery.map(storages, function(storage) { - return storage.scanForDirectoriesContainingFile('titles.idx') - .then(function(dirs) { - jQuery.merge(directories, dirs); - return true; - }); - }); - jQuery.when.apply(null, promises).then(function() { - callbackFunction(directories); - }, function(error) { - alert("Error scanning your SD card : " + error - + ". If you're using the Firefox OS Simulator, please put the archives in " - + "a 'fake-sdcard' directory inside your Firefox profile " - + "(ex : ~/.mozilla/firefox/xxxx.default/extensions/fxos_1_x_simulator@mozilla.org/" - + "profile/fake-sdcard/wikipedia_small_2010-08-14)"); - callbackFunction(null); - }); - }; - - /** - * Normalize the given String, if the current Archive is compatible. - * If it's not, return the given String, as is. - * @param string : string to normalized - * @returns normalized string, or same string if archive is not compatible - */ - LocalArchive.prototype.normalizeStringIfCompatibleArchive = function(string) { - if (this.normalizedTitles === true) { - return normalize_string.normalizeString(string); - } - else { - return string; - } - }; - - /** - * Returns a function that normalizes strings if the current archive is compatible. - * If it is not, returns the identity function. - */ - LocalArchive.prototype.getNormalizeFunction = function() { - if (this.normalizedTitles === true) { - return normalize_string.normalizeString; - } else { - return function(string) { return string; }; - } - }; - - /** - * ErrorHandler for FileReader - * @param {type} evt - * @returns {undefined} - */ - function errorHandler(evt) { - switch (evt.target.error.code) { - case evt.target.error.NOT_FOUND_ERR: - alert('File Not Found!'); - break; - case evt.target.error.NOT_READABLE_ERR: - alert('File is not readable'); - break; - case evt.target.error.ABORT_ERR: - break; // noop - default: - alert('An error occurred reading this file.'); - }; - } - - - /** - * Functions and classes exposed by this module - */ - return { - LocalArchive: LocalArchive - }; -}); +/** + * archive.js : Class for a local Evopedia archive, with the algorithms to read it + * This file handles finding a title in an archive, reading an article in an archive etc + * + * Copyright 2013-2014 Mossroy and contributors + * License GPL v3: + * + * This file is part of Evopedia. + * + * Evopedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Evopedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Evopedia (file LICENSE-GPLv3.txt). If not, see + */ +define(function(require) { + + // Module dependencies + var normalize_string = require('normalize_string'); + var util = require('util'); + var geometry = require('geometry'); + var jQuery = require('jquery'); + var titleIterators = require('titleIterators'); + + // Declare the webworker that can uncompress with bzip2 algorithm + var webworkerBzip2; + try { + // When using the application normally + webworkerBzip2 = new Worker("js/lib/webworker_bzip2.js"); + } + catch(e) { + // When using unit tests + webworkerBzip2 = new Worker("www/js/lib/webworker_bzip2.js"); + } + + // Size of chunks read in the dump files : 128 KB + var CHUNK_SIZE = 131072; + // A rectangle representing all the earth globe + var GLOBE_RECTANGLE = new geometry.rect(-91, -181, 182, 362); + + /** + * LocalArchive class : defines a wikipedia dump on the filesystem + */ + function LocalArchive() { + this._dataFiles = new Array(); + this._coordinateFiles = new Array(); + this._titleFile = null; + this._mathIndexFile = null; + this._mathDataFile = null; + this._date = null; + this._language = null; + this._titleSearchFile = null; + this._normalizedTitles = true; + }; + + + /** + * Read the title Files in the given directory, and assign them to the + * current LocalArchive + * + * @param storage + * @param directory + */ + LocalArchive.prototype.readTitleFilesFromStorage = function(storage, directory) { + var currentLocalArchiveInstance = this; + storage.get(directory + 'titles.idx').then(function(file) { + currentLocalArchiveInstance._titleFile = file; + }, function(error) { + alert("Error reading title file in directory " + directory + " : " + error); + }); + storage.get(directory + 'titles_search.idx').then(function(file) { + currentLocalArchiveInstance._titleSearchFile = file; + }, function(error) { + // Do nothing : this file is not mandatory in an archive + }); + }; + + /** + * Read the data Files in the given directory (starting at given index), and + * assign them to the current LocalArchive + * + * @param storage + * @param directory + * @param index + */ + LocalArchive.prototype.readDataFilesFromStorage = function(storage, directory, index) { + var currentLocalArchiveInstance = this; + + var prefixedFileNumber = ""; + if (index < 10) { + prefixedFileNumber = "0" + index; + } else { + prefixedFileNumber = index; + } + storage.get(directory + 'wikipedia_' + prefixedFileNumber + '.dat') + .then(function(file) { + currentLocalArchiveInstance.dataFiles[index] = file; + currentLocalArchiveInstance.readDataFilesFromStorage(storage, directory, + index + 1); + }, function(error) { + // TODO there must be a better way to detect a FileNotFound + if (error != "NotFoundError") { + alert("Error reading data file " + index + " in directory " + + directory + " : " + error); + } + }); + }; + + /** + * Read the coordinate Files in the given directory (starting at given index), and + * assign them to the current LocalArchive + * + * @param storage + * @param directory + * @param index + */ + LocalArchive.prototype.readCoordinateFilesFromStorage = function(storage, directory, index) { + var currentLocalArchiveInstance = this; + + var prefixedFileNumber = ""; + if (index < 10) { + prefixedFileNumber = "0" + index; + } else { + prefixedFileNumber = index; + } + storage.get(directory + 'coordinates_' + prefixedFileNumber + + '.idx').then(function(file) { + currentLocalArchiveInstance._coordinateFiles[index - 1] = file; + currentLocalArchiveInstance.readCoordinateFilesFromStorage(storage, directory, + index + 1); + }, function(error) { + // TODO there must be a better way to detect a FileNotFound + if (error != "NotFoundError") { + alert("Error reading coordinates file " + index + " in directory " + + directory + " : " + error); + } + }); + }; + + /** + * Read the metadata.txt file in the given directory, and store its content + * in the current instance + * + * @param storage + * @param directory + */ + LocalArchive.prototype.readMetadataFileFromStorage = function(storage, directory) { + var currentLocalArchiveInstance = this; + + storage.get(directory + 'metadata.txt').then(function(file) { + var metadataFile = file; + currentLocalArchiveInstance.readMetadataFile(metadataFile); + }, function(error) { + alert("Error reading metadata.txt file in directory " + + directory + " : " + error); + }); + }; + + /** + * Read the metadata file, in order to populate its values in the current + * instance + * @param {File} file metadata.txt file + */ + LocalArchive.prototype.readMetadataFile = function(file) { + var currentLocalArchiveInstance = this; + var reader = new FileReader(); + reader.onload = function(e) { + var metadata = e.target.result; + currentLocalArchiveInstance.language = /\nlanguage ?\= ?([^ \n]+)/.exec(metadata)[1]; + currentLocalArchiveInstance.date = /\ndate ?\= ?([^ \n]+)/.exec(metadata)[1]; + var normalizedTitlesRegex = /\nnormalized_titles ?\= ?([^ \n]+)/; + if (normalizedTitlesRegex.exec(metadata)) { + var normalizedTitlesInt = normalizedTitlesRegex.exec(metadata)[1]; + if (normalizedTitlesInt === "0") { + currentLocalArchiveInstance._normalizedTitles = false; + } + else { + currentLocalArchiveInstance._normalizedTitles = true; + } + } + else { + currentLocalArchiveInstance._normalizedTitles = true; + } + }; + reader.readAsText(file); + }; + + /** + * Initialize the localArchive from given archive files + * @param {type} archiveFiles + */ + LocalArchive.prototype.initializeFromArchiveFiles = function(archiveFiles) { + var dataFileRegex = /^wikipedia_(\d\d).dat$/; + var coordinateFileRegex = /^coordinates_(\d\d).idx$/; + this._dataFiles = new Array(); + this._coordinateFiles = new Array(); + for (var i=0; i 0) { + var intFileNr = 1 * coordinateFileNr[1]; + this._coordinateFiles[intFileNr - 1] = file; + } + else { + var dataFileNr = dataFileRegex.exec(file.name); + if (dataFileNr && dataFileNr.length > 0) { + var intFileNr = 1 * dataFileNr[1]; + this._dataFiles[intFileNr] = file; + } + } + } + } + } + + }; + + /** + * Initialize the localArchive from given directory, using DeviceStorage + * @param {type} storages List of DeviceStorages available + * @param {type} archiveDirectory + */ + LocalArchive.prototype.initializeFromDeviceStorage = function(storages, archiveDirectory) { + // First, we have to find which DeviceStorage has been selected by the user + // It is the prefix of the archive directory + var storageNameRegex = /^\/([^\/]+)\//; + var regexResults = storageNameRegex.exec(archiveDirectory); + var selectedStorage = null; + if (regexResults && regexResults.length>0) { + var selectedStorageName = regexResults[1]; + for (var i=0; i= titleCount) { + return titles; + } + return iterator.advance().then(function(title) { + if (title === null) + return titles; + titles.push(title); + return addNext(); + }); + } + return addNext(); + }).then(callbackFunction, errorHandler); + }; + + /** + * Look for a title by its name, and call the callbackFunction with this Title + * If the title is not found, the callbackFunction is called with parameter null + * @param titleName + * @param callbackFunction + */ + LocalArchive.prototype.getTitleByName = function(titleName, callbackFunction) { + var that = this; + var normalize = this.getNormalizeFunction(); + var normalizedTitleName = normalize(titleName); + + titleIterators.findPrefixOffset(this._titleFile, titleName, normalize).then(function(offset) { + var iterator = new titleIterators.SequentialTitleIterator(that, offset); + function check(title) { + if (title === null || normalize(title._name) !== normalizedTitleName) { + return null; + } else if (title._name === titleName) { + return title; + } else { + return iterator.advance().then(check); + } + } + return iterator.advance().then(check); + }).then(callbackFunction, errorHandler); + }; + + /** + * Get a random title, and call the callbackFunction with this Title + * @param callbackFunction + */ + LocalArchive.prototype.getRandomTitle = function(callbackFunction) { + var that = this; + var offset = Math.floor(Math.random() * this._titleFile.size); + jQuery.when().then(function() { + return util.readFileSlice(that._titleFile, offset, + offset + titleIterators.MAX_TITLE_LENGTH).then(function(byteArray) { + // Let's find the next newLine + var newLineIndex = 0; + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + var iterator = new titleIterators.SequentialTitleIterator(that, offset + newLineIndex + 1); + return iterator.advance(); + }); + }).then(callbackFunction, errorHandler); + }; + + /** + * Find titles that start with the given prefix, and call the callbackFunction with this list of Titles + * @param prefix + * @param maxSize Maximum number of titles to read + * @param callbackFunction + */ + LocalArchive.prototype.findTitlesWithPrefix = function(prefix, maxSize, callbackFunction) { + var that = this; + var titles = []; + var normalize = this.getNormalizeFunction(); + prefix = normalize(prefix); + + titleIterators.findPrefixOffset(this._titleFile, prefix, normalize).then(function(offset) { + var iterator = new titleIterators.SequentialTitleIterator(that, offset); + function addNext() { + if (titles.length >= maxSize) { + return jQuery.Deferred().resolve(titles, maxSize); + } + return iterator.advance().then(function(title) { + if (title === null) + return jQuery.Deferred().resolve(titles, maxSize); + // check whether this title really starts with the prefix + var name = normalize(title._name); + if (name.length < prefix.length || name.substring(0, prefix.length) !== prefix) + return jQuery.Deferred().resolve(titles, maxSize); + titles.push(title); + return addNext(); + }); + } + return addNext(); + }).then(callbackFunction, errorHandler); + }; + + + /** + * Read an article from the title instance, and call the + * callbackFunction with the article HTML String + * + * @param title + * @param callbackFunction + */ + LocalArchive.prototype.readArticle = function(title, callbackFunction) { + var dataFile = null; + + var prefixedFileNumber = ""; + if (title._fileNr < 10) { + prefixedFileNumber = "0" + title._fileNr; + } else { + prefixedFileNumber = title._fileNr; + } + var expectedFileName = "wikipedia_" + prefixedFileNumber + ".dat"; + + // Find the good dump file + for (var i = 0; i < this._dataFiles.length; i++) { + var fileName = this._dataFiles[i].name; + // Check if the fileName ends with the expected file name (in case + // of DeviceStorage usage, the fileName is prefixed by the + // directory) + if (fileName.match(expectedFileName + "$") == expectedFileName) { + dataFile = this._dataFiles[i]; + } + } + if (!dataFile) { + // TODO can probably be replaced by some error handler at window level + alert("Oops : some files seem to be missing in your archive. Please report this problem to us by email (see About section), with the names of the archive and article, and the following info : " + + "File number " + title._fileNr + " not found"); + throw new Error("File number " + title._fileNr + " not found"); + } else { + var reader = new FileReader(); + // Read the article in the dataFile, starting with a chunk of CHUNK_SIZE + this.readArticleChunk(title, dataFile, reader, CHUNK_SIZE, callbackFunction); + } + + }; + + /** + * Read a chunk of the dataFile (of the given length) to try to read the + * given article. + * If the bzip2 algorithm works and articleLength of the article is reached, + * call the callbackFunction with the article HTML String. + * Else, recursively call this function with readLength + CHUNK_SIZE + * + * @param title + * @param dataFile + * @param reader + * @param readLength + * @param callbackFunction + */ + LocalArchive.prototype.readArticleChunk = function(title, dataFile, reader, + readLength, callbackFunction) { + var currentLocalArchiveInstance = this; + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Data file read cancelled'); + }; + reader.onload = function(e) { + try { + var compressedArticles = e.target.result; + webworkerBzip2.onerror = function(event){ + // TODO can probably be replaced by some error handler at window level + alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" + + event.message + " filename=" + event.filename + " line number=" + event.lineno ); + throw new Error("Error during bzip2 decompression : " + event.message + " (" + event.filename + ":" + event.lineno + ")"); + }; + webworkerBzip2.onmessage = function(event){ + switch (event.data.cmd){ + case "result": + var htmlArticles = event.data.msg; + // Start reading at offset, and keep length characters + var htmlArticle = htmlArticles.substring(title._blockOffset, + title._blockOffset + title._articleLength); + if (htmlArticle.length >= title._articleLength) { + // Keep only length characters + htmlArticle = htmlArticle.substring(0, title._articleLength); + // Decode UTF-8 encoding + htmlArticle = decodeURIComponent(escape(htmlArticle)); + callbackFunction(title, htmlArticle); + } else { + // TODO : throw exception if we reach the end of the file + currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, + callbackFunction); + } + break; + case "recurse": + currentLocalArchiveInstance.readArticleChunk(title, dataFile, reader, readLength + CHUNK_SIZE, callbackFunction); + break; + case "debug": + console.log(event.data.msg); + break; + case "error": + // TODO can probably be replaced by some error handler at window level + alert("An unexpected error occured during bzip2 decompression. Please report it to us by email or through Github (see About section), with the names of the archive and article, and the following info : message=" + + event.data.msg ); + throw new Error("Error during bzip2 decompression : " + event.data.msg); + break; + } + }; + webworkerBzip2.postMessage({cmd : 'uncompress', msg : + new Uint8Array(compressedArticles)}); + + } + catch (e) { + callbackFunction("Error : " + e); + } + }; + var blob = dataFile.slice(title._blockStart, title._blockStart + + readLength); + + // Read in the image file as a binary string. + reader.readAsArrayBuffer(blob); + }; + + /** + * Load the math image specified by the hex string and call the + * callbackFunction with a base64 encoding of its data. + * + * @param hexString + * @param callbackFunction + */ + LocalArchive.prototype.loadMathImage = function(hexString, callbackFunction) { + var entrySize = 16 + 4 + 4; + var lo = 0; + var hi = this._mathIndexFile.size / entrySize; + + var mathDataFile = this._mathDataFile; + + this.findMathDataPosition(hexString, lo, hi, function(pos, length) { + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Math image file read cancelled'); + }; + var blob = mathDataFile.slice(pos, pos + length); + reader.onload = function(e) { + var byteArray = new Uint8Array(e.target.result); + callbackFunction(util.uint8ArrayToBase64(byteArray)); + }; + reader.readAsArrayBuffer(blob); + }); + }; + + + /** + * Recursive algorithm to find the position of the Math image in the data file + * @param {type} hexString + * @param {type} lo + * @param {type} hi + * @param {type} callbackFunction + */ + LocalArchive.prototype.findMathDataPosition = function(hexString, lo, hi, callbackFunction) { + var entrySize = 16 + 4 + 4; + if (lo >= hi) { + /* TODO error - not found */ + return; + } + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Math image file read cancelled'); + }; + var mid = Math.floor((lo + hi) / 2); + var blob = this._mathIndexFile.slice(mid * entrySize, (mid + 1) * entrySize); + var currentLocalArchiveInstance = this; + reader.onload = function(e) { + var byteArray = new Uint8Array(e.target.result); + var hash = util.uint8ArrayToHex(byteArray.subarray(0, 16)); + if (hash == hexString) { + var pos = util.readIntegerFrom4Bytes(byteArray, 16); + var length = util.readIntegerFrom4Bytes(byteArray, 16 + 4); + callbackFunction(pos, length); + return; + } else if (hexString < hash) { + hi = mid; + } else { + lo = mid + 1; + } + + currentLocalArchiveInstance.findMathDataPosition(hexString, lo, hi, callbackFunction); + }; + // Read the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + + /** + * Resolve the redirect of the given title instance, and call the callbackFunction with the redirected Title instance + * @param title + * @param callbackFunction + */ + LocalArchive.prototype.resolveRedirect = function(title, callbackFunction) { + var reader = new FileReader(); + reader.onerror = errorHandler; + reader.onabort = function(e) { + alert('Title file read cancelled'); + }; + reader.onload = function(e) { + var binaryTitleFile = e.target.result; + var byteArray = new Uint8Array(binaryTitleFile); + + if (byteArray.length === 0) { + // TODO can probably be replaced by some error handler at window level + alert("Oops : there seems to be something wrong in your archive. Please report it to us by email or through Github (see About section), with the names of the archive and article and the following info : " + + "Unable to find redirected article for title " + title._name + " : offset " + title._blockStart + " not found in title file"); + throw new Error("Unable to find redirected article for title " + title._name + " : offset " + title._blockStart + " not found in title file"); + } + + var redirectedTitle = title; + redirectedTitle._fileNr = 1 * byteArray[2]; + redirectedTitle._blockStart = util.readIntegerFrom4Bytes(byteArray, 3); + redirectedTitle._blockOffset = util.readIntegerFrom4Bytes(byteArray, 7); + redirectedTitle._articleLength = util.readIntegerFrom4Bytes(byteArray, 11); + + callbackFunction(redirectedTitle); + }; + // Read only the 16 necessary bytes, starting at title.blockStart + var blob = this._titleFile.slice(title._blockStart, title._blockStart + 16); + // Read in the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + // This is a global counter that helps find out when the search for articles nearby is over + var callbackCounterForTitlesInCoordsSearch = 0; + + /** + * Finds titles that are located inside the given rectangle + * This is the main function, that has to be called from the application + * + * @param {type} rect Rectangle where to look for titles + * @param {type} maxTitles Maximum number of titles to find + * @param callbackFunction Function to call with the list of titles found + */ + LocalArchive.prototype.getTitlesInCoords = function(rect, maxTitles, callbackFunction) { + if (callbackCounterForTitlesInCoordsSearch > 0) { + alert("The last nearby search did not seem to end well : please try again"); + callbackCounterForTitlesInCoordsSearch = 0; + return; + } + var normalizedRectangle = rect.normalized(); + var titlePositionsFound = new Array(); + for (var i = 0; i < this._coordinateFiles.length; i++) { + callbackCounterForTitlesInCoordsSearch++; + LocalArchive.getTitlesInCoordsInt(this, i, 0, normalizedRectangle, GLOBE_RECTANGLE, maxTitles, titlePositionsFound, callbackFunction, LocalArchive.callbackGetTitlesInCoordsInt); + } + }; + + /** + * Callback function called by getTitlesInCoordsInt (or by itself), in order + * to loop through every coordinate file, and search titles nearby in each + * of them. + * When all the coordinate files are searched, or when enough titles are + * found, another function is called to convert the title positions found + * into Title instances (asynchronously) + * + * @param {type} localArchive + * @param {type} titlePositionsFound + * @param {type} maxTitles + * @param {type} callbackFunction + */ + LocalArchive.callbackGetTitlesInCoordsInt = function(localArchive, titlePositionsFound, maxTitles, callbackFunction) { + // Search is over : now let's convert the title positions into Title instances + if (titlePositionsFound && titlePositionsFound.length > 0) { + LocalArchive.readTitlesFromTitleCoordsInTitleFile(localArchive, titlePositionsFound, 0, new Array(), maxTitles, callbackFunction); + } + else { + callbackFunction(titlePositionsFound, maxTitles); + } + + }; + + /** + * This function reads a list of title positions, and converts it into a list or Title instances. + * It handles index i, then recursively calls itself for index i+1 + * When all the list is processed, the callbackFunction is called with the Title list + * + * @param {type} localArchive + * @param {type} titlePositionsFound + * @param {type} i + * @param {type} titlesFound + * @param maxTitles + * @param {type} callbackFunction + */ + LocalArchive.readTitlesFromTitleCoordsInTitleFile = function (localArchive, titlePositionsFound, i, titlesFound, maxTitles, callbackFunction) { + var titleOffset = titlePositionsFound[i]; + localArchive.getTitlesStartingAtOffset(titleOffset, 1, function(titleList) { + if (titleList && titleList.length === 1) { + titlesFound.push(titleList[0]); + i++; + if (i= 0 && titlePositionsFound.length < maxTitles) { + titlePositionsFound.push(title_pos); + console.log("maxTitles="+maxTitles+" titlePositionsFound.length="+titlePositionsFound.length); + } + } + } + if (callbackCounterForTitlesInCoordsSearch === 0) { + console.log("callbackCounterForTitlesInCoordsSearch reached 0 : return the titles found"); + callbackGetTitlesInCoordsInt(localArchive, titlePositionsFound, maxTitles, callbackFunction); + } + + }; + // Read 22 bytes in the coordinate files, at coordFilePos index, in order to read the selector and the coordinates + // 2 + 4 + 4 + 3 * 4 = 22 + // As there can be up to 65535 different coordinates, we have to read 22*65535 bytes = 1.44MB + // TODO : This should be improved by reading the file in 2 steps : + // - first read the selector + // - then read the coordinates (reading only the exact necessary bytes) + var blob = localArchive._coordinateFiles[coordinateFileIndex].slice(coordFilePos, coordFilePos + 22*65535); + + // Read in the file as a binary string + reader.readAsArrayBuffer(blob); + }; + + /** + *  Scans the DeviceStorage for archives + * + * @param storages List of DeviceStorage instances + * @param callbackFunction Function to call with the list of directories where archives are found + */ + LocalArchive.scanForArchives = function(storages, callbackFunction) { + var directories = []; + var promises = jQuery.map(storages, function(storage) { + return storage.scanForDirectoriesContainingFile('titles.idx') + .then(function(dirs) { + jQuery.merge(directories, dirs); + return true; + }); + }); + jQuery.when.apply(null, promises).then(function() { + callbackFunction(directories); + }, function(error) { + alert("Error scanning your SD card : " + error + + ". If you're using the Firefox OS Simulator, please put the archives in " + + "a 'fake-sdcard' directory inside your Firefox profile " + + "(ex : ~/.mozilla/firefox/xxxx.default/extensions/fxos_1_x_simulator@mozilla.org/" + + "profile/fake-sdcard/wikipedia_small_2010-08-14)"); + callbackFunction(null); + }); + }; + + /** + * Normalize the given String, if the current Archive is compatible. + * If it's not, return the given String, as is. + * @param string : string to normalized + * @returns normalized string, or same string if archive is not compatible + */ + LocalArchive.prototype.normalizeStringIfCompatibleArchive = function(string) { + if (this._normalizedTitles === true) { + return normalize_string.normalizeString(string); + } + else { + return string; + } + }; + + /** + * Returns a function that normalizes strings if the current archive is compatible. + * If it is not, returns the identity function. + */ + LocalArchive.prototype.getNormalizeFunction = function() { + if (this._normalizedTitles === true) { + return normalize_string.normalizeString; + } else { + return function(string) { return string; }; + } + }; + + /** + * ErrorHandler for FileReader + * @param {type} evt + * @returns {undefined} + */ + function errorHandler(evt) { + switch (evt.target.error.code) { + case evt.target.error.NOT_FOUND_ERR: + alert('File Not Found!'); + break; + case evt.target.error.NOT_READABLE_ERR: + alert('File is not readable'); + break; + case evt.target.error.ABORT_ERR: + break; // noop + default: + alert('An error occurred reading this file.'); + }; + } + + + /** + * Functions and classes exposed by this module + */ + return { + LocalArchive: LocalArchive + }; +}); diff --git a/www/js/lib/title.js b/www/js/lib/title.js index a54818ce..e73c90bc 100644 --- a/www/js/lib/title.js +++ b/www/js/lib/title.js @@ -1,150 +1,149 @@ -/** - * title.js : Class for the title of an article - * - * Copyright 2013-2014 Mossroy and contributors - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(function(require) { - - // Module dependencies - var utf8 = require('utf8'); - var util = require('util'); - - /** - * Title class : defines the title of an article and some methods to manipulate it - */ - function Title() { - this.name = null; - this.fileNr = null; - this.blockStart = null; - this.blockOffset = null; - this.articleLength = null; - this.archive = null; - this.titleOffset = null; - this.titleEntryLength = null; - } - ; - - Title.prototype.getReadableName = function() { - return this.name.replace(/_/g, " "); - }; - - - /** - * Creates a Title instance from an encoded title line from a title file - * @param {type} encodedTitle - * @param {type} archive - * @param {type} titleOffset - * @returns {_L1.Title} - */ - Title.parseTitle = function(encodedTitle, archive, titleOffset) { - if (archive === null) { - throw new Error("Error while parsing an encoded title line un title File : archive cannot be null"); - } - if (titleOffset < 0) { - throw new Error("Error while parsing an encoded title line un title File : titleOffset cannot be negative (was " + titleOffset + ")"); - } - var t = new Title(); - t.archive = archive; - t.titleOffset = titleOffset; - - if (encodedTitle === null || encodedTitle.length < 15) - return null; - - if (encodedTitle[encodedTitle.length - 1] == '\n') { - t.titleEntryLength = encodedTitle.length; - } else { - t.titleEntryLength = encodedTitle.length + 1; - } - - var escapedEncodedTitle = new Uint8Array(encodedTitle); - var escapes = util.readIntegerFrom2Bytes(encodedTitle, 0); - if ((escapes & (1 << 14)) != 0) - escapes |= 10; - for (var i = 0; i < 13; i++) { - if ((escapes & (1 << i)) != 0) - escapedEncodedTitle[i + 2] = 10; // Corresponds to \n - } - - t.fileNr = 1 * escapedEncodedTitle[2]; - t.blockStart = util.readIntegerFrom4Bytes(escapedEncodedTitle, 3); - t.blockOffset = util.readIntegerFrom4Bytes(escapedEncodedTitle, 7); - t.articleLength = util.readIntegerFrom4Bytes(escapedEncodedTitle, 11); - - t.name = Title.parseNameOnly(escapedEncodedTitle); - - return t; - }; - - /* - * Retrieves the name of an article from an encoded title line - */ - Title.parseNameOnly = function(encodedTitle) { - var len = encodedTitle.length; - if (len < 15) { - return null; - } - if (len > 15 && encodedTitle[len - 1] == '\n') { - len--; - } - return utf8.parse(encodedTitle.subarray(15, len)); - }; - - /** - * Creates a title instance from a serialized id - * @param {type} localArchive - * @param {type} titleId - * @returns {_L1.Title} - */ - Title.parseTitleId = function(localArchive, titleId) { - var title = new Title(); - var idParts = titleId.split("|"); - title.archive = localArchive; - title.fileNr = parseInt(idParts[2], 10); - title.titleOffset = parseInt(idParts[3], 10); - title.name = idParts[4]; - title.blockStart = parseInt(idParts[5], 10); - title.blockOffset = parseInt(idParts[6], 10); - title.articleLength = parseInt(idParts[7], 10); - return title; - }; - - - /** - * Serialize the title with its values - * @returns {String} - */ - Title.prototype.toStringId = function() { - return this.archive.language + "|" + this.archive.date + "|" + this.fileNr + "|" - + this.titleOffset + "|" + this.name + "|" + this.blockStart + "|" + this.blockOffset + "|" + this.articleLength; - }; - - /** - * Serialize the title in a readable way - */ - Title.prototype.toString = function() { - return "title.id = " + this.toStringId() + "title.name = " + this.name + " title.fileNr = " + this.fileNr + " title.blockStart = " + this.blockStart + " title.blockOffset = " + this.blockOffset + " title.articleLength = " + this.articleLength; - }; - - /** - * Functions and classes exposed by this module - */ - return { - Title: Title - }; -}); +/** + * title.js : Class for the title of an article + * + * Copyright 2013-2014 Mossroy and contributors + * License GPL v3: + * + * This file is part of Evopedia. + * + * Evopedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Evopedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Evopedia (file LICENSE-GPLv3.txt). If not, see + */ +define(function(require) { + + // Module dependencies + var utf8 = require('utf8'); + var util = require('util'); + + /** + * Title class : defines the title of an article and some methods to manipulate it + */ + function Title() { + this._name = null; + this._fileNr = null; + this._blockStart = null; + this._blockOffset = null; + this._articleLength = null; + this._archive = null; + this._titleOffset = null; + this._titleEntryLength = null; + }; + + Title.prototype.getReadableName = function() { + return this._name.replace(/_/g, " "); + }; + + + /** + * Creates a Title instance from an encoded title line from a title file + * @param {type} encodedTitle + * @param {type} archive + * @param {type} titleOffset + * @returns {_L1.Title} + */ + Title.parseTitle = function(encodedTitle, archive, titleOffset) { + if (archive === null) { + throw new Error("Error while parsing an encoded title line un title File : archive cannot be null"); + } + if (titleOffset < 0) { + throw new Error("Error while parsing an encoded title line un title File : titleOffset cannot be negative (was " + titleOffset + ")"); + } + var t = new Title(); + t._archive = archive; + t._titleOffset = titleOffset; + + if (encodedTitle === null || encodedTitle.length < 15) + return null; + + if (encodedTitle[encodedTitle.length - 1] == '\n') { + t._titleEntryLength = encodedTitle.length; + } else { + t._titleEntryLength = encodedTitle.length + 1; + } + + var escapedEncodedTitle = new Uint8Array(encodedTitle); + var escapes = util.readIntegerFrom2Bytes(encodedTitle, 0); + if ((escapes & (1 << 14)) != 0) + escapes |= 10; + for (var i = 0; i < 13; i++) { + if ((escapes & (1 << i)) != 0) + escapedEncodedTitle[i + 2] = 10; // Corresponds to \n + } + + t._fileNr = 1 * escapedEncodedTitle[2]; + t._blockStart = util.readIntegerFrom4Bytes(escapedEncodedTitle, 3); + t._blockOffset = util.readIntegerFrom4Bytes(escapedEncodedTitle, 7); + t._articleLength = util.readIntegerFrom4Bytes(escapedEncodedTitle, 11); + + t._name = Title.parseNameOnly(escapedEncodedTitle); + + return t; + }; + + /* + * Retrieves the name of an article from an encoded title line + */ + Title.parseNameOnly = function(encodedTitle) { + var len = encodedTitle.length; + if (len < 15) { + return null; + } + if (len > 15 && encodedTitle[len - 1] == '\n') { + len--; + } + return utf8.parse(encodedTitle.subarray(15, len)); + }; + + /** + * Creates a title instance from a serialized id + * @param {type} localArchive + * @param {type} titleId + * @returns {_L1.Title} + */ + Title.parseTitleId = function(localArchive, titleId) { + var title = new Title(); + var idParts = titleId.split("|"); + title._archive = localArchive; + title._fileNr = parseInt(idParts[2], 10); + title._titleOffset = parseInt(idParts[3], 10); + title._name = idParts[4]; + title._blockStart = parseInt(idParts[5], 10); + title._blockOffset = parseInt(idParts[6], 10); + title._articleLength = parseInt(idParts[7], 10); + return title; + }; + + + /** + * Serialize the title with its values + * @returns {String} + */ + Title.prototype.toStringId = function() { + return this._archive.language + "|" + this._archive.date + "|" + this._fileNr + "|" + + this._titleOffset + "|" + this._name + "|" + this._blockStart + "|" + this._blockOffset + "|" + this._articleLength; + }; + + /** + * Serialize the title in a readable way + */ + Title.prototype.toString = function() { + return "title.id = " + this.toStringId() + "title.name = " + this._name + " title.fileNr = " + this._fileNr + " title.blockStart = " + this._blockStart + " title.blockOffset = " + this._blockOffset + " title.articleLength = " + this._articleLength; + }; + + /** + * Functions and classes exposed by this module + */ + return { + Title: Title + }; +}); diff --git a/www/js/lib/titleIterators.js b/www/js/lib/titleIterators.js index 01751db0..064a3fbd 100644 --- a/www/js/lib/titleIterators.js +++ b/www/js/lib/titleIterators.js @@ -1,127 +1,127 @@ -/** - * titleIterators.js : Various classes to iterate over titles, for example as a - * result of searching. - * - * Copyright 2014 Evopedia developers - * License GPL v3: - * - * This file is part of Evopedia. - * - * Evopedia is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Evopedia is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Evopedia (file LICENSE-GPLv3.txt). If not, see - */ -define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) { - // Maximum length of a title - // 300 bytes is arbitrary : we actually do not really know how long the titles will be - // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough - var MAX_TITLE_LENGTH = 300; - - /** - * Iterates over all titles starting at the given offset. - * The asynchronous method advance has to be called before this.title is - * valid. - * @param archive - * @param offset - */ - function SequentialTitleIterator(archive, offset) { - this._titleFile = archive.titleFile; - this._archive = archive; - this._offset = offset; - this.title = null; - }; - /** - * Advances to the next title (or the first), if possible. - * @returns jQuery promise containing the next title or null if there is no - * next title - */ - SequentialTitleIterator.prototype.advance = function() { - if (this._offset >= this._titleFile.size) { - this.title = null; - return jQuery.when(this.title); - } - var that = this; - return util.readFileSlice(this._titleFile, this._offset, - this._offset + MAX_TITLE_LENGTH).then(function(byteArray) { - var newLineIndex = 15; - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - var encodedTitle = byteArray.subarray(0, newLineIndex); - that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset); - that._offset += newLineIndex + 1; - return that._title; - }); - }; - - /** - * Searches for the offset into the given title file where the first title - * with the given prefix (or lexicographically larger) is located. - * The given function normalize is applied to every title before comparison. - * @param titleFile - * @param prefix - * @param normalize function to be applied to every title before comparison - * @returns jQuery promise giving the offset - */ - function findPrefixOffset(titleFile, prefix, normalize) { - prefix = normalize(prefix); - var lo = 0; - var hi = titleFile.size; - var iterate = function() { - if (lo >= hi - 1) { - if (lo > 0) - lo += 2; // Let lo point to the start of an entry - return jQuery.when(lo); - } else { - var mid = Math.floor((lo + hi) / 2); - return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) { - // Look for the index of the next NewLine - var newLineIndex = 0; - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - var startIndex = 0; - if (mid > 0) { - startIndex = newLineIndex + 16; - newLineIndex = startIndex; - // Look for the index of the next NewLine - while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { - newLineIndex++; - } - } - if (newLineIndex === startIndex) { - // End of file reached - hi = mid; - } else { - var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex))); - if (normalizedTitle < prefix) { - lo = mid + newLineIndex - 1; - } else { - hi = mid; - } - } - return iterate(); - }); - } - }; - return iterate(); - } - - /** - * Functions and classes exposed by this module - */ - return { - SequentialTitleIterator : SequentialTitleIterator, - findPrefixOffset : findPrefixOffset, - MAX_TITLE_LENGTH : MAX_TITLE_LENGTH - }; -}); +/** + * titleIterators.js : Various classes to iterate over titles, for example as a + * result of searching. + * + * Copyright 2014 Evopedia developers + * License GPL v3: + * + * This file is part of Evopedia. + * + * Evopedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Evopedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Evopedia (file LICENSE-GPLv3.txt). If not, see + */ +define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) { + // Maximum length of a title + // 300 bytes is arbitrary : we actually do not really know how long the titles will be + // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough + var MAX_TITLE_LENGTH = 300; + + /** + * Iterates over all titles starting at the given offset. + * The asynchronous method advance has to be called before this.title is + * valid. + * @param archive + * @param offset + */ + function SequentialTitleIterator(archive, offset) { + this._titleFile = archive._titleFile; + this._archive = archive; + this._offset = offset; + this._title = null; + }; + /** + * Advances to the next title (or the first), if possible. + * @returns jQuery promise containing the next title or null if there is no + * next title + */ + SequentialTitleIterator.prototype.advance = function() { + if (this._offset >= this._titleFile.size) { + this._title = null; + return jQuery.when(this._title); + } + var that = this; + return util.readFileSlice(this._titleFile, this._offset, + this._offset + MAX_TITLE_LENGTH).then(function(byteArray) { + var newLineIndex = 15; + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + var encodedTitle = byteArray.subarray(0, newLineIndex); + that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset); + that._offset += newLineIndex + 1; + return that._title; + }); + }; + + /** + * Searches for the offset into the given title file where the first title + * with the given prefix (or lexicographically larger) is located. + * The given function normalize is applied to every title before comparison. + * @param titleFile + * @param prefix + * @param normalize function to be applied to every title before comparison + * @returns jQuery promise giving the offset + */ + function findPrefixOffset(titleFile, prefix, normalize) { + prefix = normalize(prefix); + var lo = 0; + var hi = titleFile.size; + var iterate = function() { + if (lo >= hi - 1) { + if (lo > 0) + lo += 2; // Let lo point to the start of an entry + return jQuery.when(lo); + } else { + var mid = Math.floor((lo + hi) / 2); + return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) { + // Look for the index of the next NewLine + var newLineIndex = 0; + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + var startIndex = 0; + if (mid > 0) { + startIndex = newLineIndex + 16; + newLineIndex = startIndex; + // Look for the index of the next NewLine + while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { + newLineIndex++; + } + } + if (newLineIndex === startIndex) { + // End of file reached + hi = mid; + } else { + var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex))); + if (normalizedTitle < prefix) { + lo = mid + newLineIndex - 1; + } else { + hi = mid; + } + } + return iterate(); + }); + } + }; + return iterate(); + } + + /** + * Functions and classes exposed by this module + */ + return { + SequentialTitleIterator : SequentialTitleIterator, + findPrefixOffset : findPrefixOffset, + MAX_TITLE_LENGTH : MAX_TITLE_LENGTH + }; +});