Implement reading the metadata.txt file, to find its language and date

Use this language to redirect the links to images (File:xxx.jpg for example) to the online version. Fixes #23
This commit is contained in:
mossroy 2013-07-15 13:36:27 +02:00
parent 05014e469b
commit 9c470ea779
5 changed files with 183 additions and 124 deletions

View File

@ -1,106 +1,110 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Evopedia</title>
<meta name="description" content="Offline wikipedia reader">
<meta name="viewport" content="width=device-width">
<!--
Port of Evopedia (offline wikipedia reader) in HTML5/Javascript, with Firefox OS as the primary target
The original application is at http://www.evopedia.info/
It uses wikipedia dumps located at http://dumpathome.evopedia.info/dumps/finished
Author : Mossroy - mossroy@free.fr
Math image algorithm contributed by christian@evopedia.info
License:
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public
License along with this program; if not, see
<http://www.gnu.org/licenses/>.
-->
<link rel="stylesheet" href="css/mediawiki-main.css">
<link rel="stylesheet" href="css/app.css">
</head>
<body>
<h1>Evopedia</h1>
<input type="button" id="showHideAbout" value="About" />
<div id="about" style="display: none;">
This is a preliminary work on the port of Evopedia (offline wikipedia reader) in HTML5/Javascript, with Firefox OS as the primary target
<br />
The original application is at <a href="http://www.evopedia.info/">http://www.evopedia.info/</a>
<br />
<br />
To use it, you have to first download locally a dump from <a href="http://dumpathome.evopedia.info/dumps/finished">http://dumpathome.evopedia.info/dumps/finished</a> (with a Bittorrent client).
<br />
<ul>
<li>On desktops, it works on recent Firefox and Chrome, and maybe on other browsers. In this case, you have to select manually some files from your dump (see below)</li>
<li>On the Firefos OS simulator, you have to put the archive files in a "fake-sdcard" folder of your firefox profile (ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/profile/fake-sdcard). It looks for file titles.idx in it. You need to install the application from the dashboard of the simulator (due to security restrictions in Firefox OS : only certified webapps can access the sdcard)</li>
<li>On a real Firefox OS device, you simply have to put the archive files anywhere in your sdcard, so that it finds titles.idx on it. For now, the application has to be installed manually (use the push feature of the Firefos OS Simulator)</li>
</ul>
<br />
It's still a beta version : there are many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular :
<ul>
<li>The performance has to be optimized when searching titles and reading an article</li>
<li>Some searches (for example with prefix "a" on the French dump) do not give any result even if they should</li>
<li>In some cases, the links inside an article do not work, or do not lead to the right article</li>
<li>It is not easy to use on the Peak device from Geeksphone because the buttons and inputs are too small</li>
<li>Some features of the original application still have to be implemented : in particular geolocation of articles</li>
</ul>
The <a href="https://github.com/mossroy/evopedia-html5/issues">bugtracker</a> is on github too.
</div>
<div id="openLocalFiles" style="display: none;">
<br /> Please select the file titles.idx :<br /> <input type="file"
id="titleFile" /><br /> Please select the files wikipedia_*.dat
from the same dump :<br /> <input type="file" id="dataFiles" multiple />
<br /> Please select the file math.idx from the same dump:<br /> <input type="file"
id="mathIndexFile" /><br /> Please select the file math.dat from the
same dump:<br /> <input type="file" id="mathDataFile" /><br />
</div>
<div id="scanningForArchives" style="display: none;">
<br /> Scanning your sdcard for archives... Please wait <img src="img/spinner.gif" />
</div>
<div id="chooseArchiveFromLocalStorage" style="display: none;">
<br /> Please select the archive you want to use : <select id="archiveList"></select>
</div>
<br /> Find titles starting with :
<input type="text" id="prefix" value="" />&nbsp;
<input type="button" id="searchTitles" value="Search titles" />
<img id="searchingForTitles" src="img/spinner.gif" style="display: none;"/>
<br /><br /> Choose a title from the filtered list :
<select id="titleList"></select>
<br />
<br />
<input type="button" id="readData" value="Read article from dump" />
<div id="readingArticle" style="display: none;">
Reading article <span id="articleName"></span> from dump... Please wait <img src="img/spinner.gif" />
</div>
<div id="articleContent">&nbsp;</div>
<br />
<div id="navigationButtons">
<input type="button" id="btnBack" value="Back"/>
<input type="button" id="btnForward" value="Forward"/>
</div>
<!-- Using require.js, a module system for javascript, include the
js files. This loads "main.js", which in turn can load other
files, all handled by require.js:
http://requirejs.org/docs/api.html#jsfiles -->
<script type="text/javascript"
data-main="js/init.js"
src="js/lib/require.js"></script>
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Evopedia</title>
<meta name="description" content="Offline wikipedia reader">
<meta name="viewport" content="width=device-width">
<!--
Port of Evopedia (offline wikipedia reader) in HTML5/Javascript, with Firefox OS as the primary target
The original application is at http://www.evopedia.info/
It uses wikipedia dumps located at http://dumpathome.evopedia.info/dumps/finished
Author : Mossroy - mossroy@free.fr
Math image algorithm contributed by christian@evopedia.info
License:
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public
License along with this program; if not, see
<http://www.gnu.org/licenses/>.
-->
<link rel="stylesheet" href="css/mediawiki-main.css">
<link rel="stylesheet" href="css/app.css">
</head>
<body>
<h1>Evopedia</h1>
<input type="button" id="showHideAbout" value="About" />
<div id="about" style="display: none;">
This is a preliminary work on the port of Evopedia (offline wikipedia reader) in HTML5/Javascript, with Firefox OS as the primary target
<br />
The original application is at <a href="http://www.evopedia.info/">http://www.evopedia.info/</a>
<br />
<br />
To use it, you have to first download locally a dump from <a href="http://dumpathome.evopedia.info/dumps/finished">http://dumpathome.evopedia.info/dumps/finished</a> (with a Bittorrent client).
<br />
<ul>
<li>On desktops, it works on recent Firefox and Chrome, and maybe on other browsers. In this case, you have to select manually some files from your dump (see below)</li>
<li>On the Firefos OS simulator, you have to put the archive files in a "fake-sdcard" folder of your firefox profile (ex : ~/.mozilla/firefox/xxxx.default/extensions/r2d2b2g@mozilla.org/profile/fake-sdcard). It looks for file titles.idx in it. You need to install the application from the dashboard of the simulator (due to security restrictions in Firefox OS : only certified webapps can access the sdcard)</li>
<li>On a real Firefox OS device, you simply have to put the archive files anywhere in your sdcard, so that it finds titles.idx on it. For now, the application has to be installed manually (use the push feature of the Firefos OS Simulator)</li>
</ul>
<br />
It's still a beta version : there are many many ways this could be enhanced (suggestions and patches are welcome : the source code is on <a href="https://github.com/mossroy/evopedia-html5">github</a>). In particular :
<ul>
<li>The performance has to be optimized when searching titles and reading an article</li>
<li>Some searches (for example with prefix "a" on the French dump) do not give any result even if they should</li>
<li>In some cases, the links inside an article do not work, or do not lead to the right article</li>
<li>It is not easy to use on the Peak device from Geeksphone because the buttons and inputs are too small</li>
<li>Some features of the original application still have to be implemented : in particular geolocation of articles</li>
</ul>
The <a href="https://github.com/mossroy/evopedia-html5/issues">bugtracker</a> is on github too.
</div>
<div id="openLocalFiles" style="display: none;">
<br /> Please select the file titles.idx :<br />
<input type="file" id="titleFile" /><br />
Please select the files wikipedia_*.dat from the same dump :<br />
<input type="file" id="dataFiles" multiple /><br />
Please select the file math.idx from the same dump:<br />
<input type="file" id="mathIndexFile" /><br />
Please select the file math.dat from the same dump:<br />
<input type="file" id="mathDataFile" /><br />
Please select the file metadata.txt from the same dump:<br />
<input type="file" id="metadataFile" /><br />
</div>
<div id="scanningForArchives" style="display: none;">
<br /> Scanning your sdcard for archives... Please wait <img src="img/spinner.gif" />
</div>
<div id="chooseArchiveFromLocalStorage" style="display: none;">
<br /> Please select the archive you want to use : <select id="archiveList"></select>
</div>
<br /> Find titles starting with :
<input type="text" id="prefix" value="" />&nbsp;
<input type="button" id="searchTitles" value="Search titles" />
<img id="searchingForTitles" src="img/spinner.gif" style="display: none;"/>
<br /><br /> Choose a title from the filtered list :
<select id="titleList"></select>
<br />
<br />
<input type="button" id="readData" value="Read article from dump" />
<div id="readingArticle" style="display: none;">
Reading article <span id="articleName"></span> from dump... Please wait <img src="img/spinner.gif" />
</div>
<div id="articleContent">&nbsp;</div>
<br />
<div id="navigationButtons">
<input type="button" id="btnBack" value="Back"/>
<input type="button" id="btnForward" value="Forward"/>
</div>
<!-- Using require.js, a module system for javascript, include the
js files. This loads "main.js", which in turn can load other
files, all handled by require.js:
http://requirejs.org/docs/api.html#jsfiles -->
<script type="text/javascript"
data-main="js/init.js"
src="js/lib/require.js"></script>
</body>
</html>

View File

@ -89,9 +89,10 @@ define(function(require) {
function setLocalArchiveFromArchiveList() {
var archiveDirectory = $('#archiveList').val();
localArchive = new evopedia.LocalArchive();
localArchive.readTitleFile(storage, archiveDirectory);
localArchive.readDataFiles(storage, archiveDirectory, 0);
localArchive.readMathFiles(storage, archiveDirectory);
localArchive.readTitleFileFromStorage(storage, archiveDirectory);
localArchive.readDataFilesFromStorage(storage, archiveDirectory, 0);
localArchive.readMathFilesFromStorage(storage, archiveDirectory);
localArchive.readMetadataFileFromStorage(storage, archiveDirectory);
}
/**
@ -103,6 +104,7 @@ define(function(require) {
$('#titleFile').on('change', setLocalArchiveFromFileSelect);
$('#mathIndexFile').on('change', setLocalArchiveFromFileSelect);
$('#mathDataFile').on('change', setLocalArchiveFromFileSelect);
$('#metadataFile').on('change', setLocalArchiveFromFileSelect);
}
/**
@ -113,11 +115,13 @@ define(function(require) {
var titleFile = document.getElementById('titleFile').files[0];
var mathIndexFile = document.getElementById('mathIndexFile').files[0];
var mathDataFile = document.getElementById('mathDataFile').files[0];
var metadataFile = document.getElementById('metadataFile').files[0];
localArchive = new evopedia.LocalArchive();
localArchive.dataFiles = dataFiles;
localArchive.titleFile = titleFile;
localArchive.mathIndexFile = mathIndexFile;
localArchive.mathDataFile = mathDataFile;
localArchive.readMetadataFile(metadataFile);
}
/**
@ -205,11 +209,15 @@ define(function(require) {
// Display the article inside the web page.
$('#articleContent').html(htmlArticle);
// Compile the regular expressions needed to modify links
var regexOtherLanguage = /^\.?\/?\.\.\/([^\/]+)\/(.*)/;
var regexImageLink = /^.?\/?[^:]+:(.*)/;
// Convert links into javascript calls
var regex = /^\.?\/?\.\.\/([^\/]+)\/(.*)/;
$('#articleContent').find('a').each(function() {
// Store current link's url
var url = $(this).attr("href");
var lowerCaseUrl = url.toLowerCase();
var cssClass = $(this).attr("class");
if (cssClass === "new") {
@ -225,10 +233,19 @@ define(function(require) {
else if (url.substring(0, 4) === "http") {
// It's an external link : do nothing
}
else if (url.substring(0, 2) === ".." || url.substring(0, 4) === "./..") {
else if (url.match(regexOtherLanguage)) {
// It's a link to another language : change the URL to the online version of wikipedia
// The regular expression extracts $1 as the language, and $2 as the title name
var onlineWikipediaUrl = url.replace(regex, "https://$1.wikipedia.org/wiki/$2");
var onlineWikipediaUrl = url.replace(regexOtherLanguage, "https://$1.wikipedia.org/wiki/$2");
$(this).attr("href", onlineWikipediaUrl);
}
else if (url.match(regexImageLink)
&& (evopedia.endsWith(lowerCaseUrl, ".png")
|| evopedia.endsWith(lowerCaseUrl, ".svg")
|| evopedia.endsWith(lowerCaseUrl, ".jpg")
|| evopedia.endsWith(lowerCaseUrl, ".jpeg"))) {
// It's a link to a file of wikipedia : change the URL to the online version
var onlineWikipediaUrl = url.replace(regexImageLink, "https://"+localArchive.language+".wikipedia.org/wiki/File:$1");
$(this).attr("href", onlineWikipediaUrl);
}
else {

View File

@ -72,11 +72,9 @@ define(function(require) {
this.titleFile = null;
this.mathIndexFile = null;
this.mathDataFile = null;
// TODO to be replaced by the real archive attributes
this.date = "2013-03-14";
this.language = "zz";
}
;
this.date = null;
this.language = null;
};
/**
@ -86,7 +84,7 @@ define(function(require) {
* @param storage
* @param directory
*/
LocalArchive.prototype.readTitleFile = function(storage, directory) {
LocalArchive.prototype.readTitleFileFromStorage = function(storage, directory) {
var currentLocalArchiveInstance = this;
var filerequest = storage.get(directory + '/titles.idx');
filerequest.onsuccess = function() {
@ -105,7 +103,7 @@ define(function(require) {
* @param directory
* @param index
*/
LocalArchive.prototype.readDataFiles = function(storage, directory, index) {
LocalArchive.prototype.readDataFilesFromStorage = function(storage, directory, index) {
var currentLocalArchiveInstance = this;
var prefixedFileNumber = "";
@ -118,7 +116,7 @@ define(function(require) {
+ '.dat');
filerequest.onsuccess = function() {
currentLocalArchiveInstance.dataFiles[index] = filerequest.result;
currentLocalArchiveInstance.readDataFiles(storage, directory,
currentLocalArchiveInstance.readDataFilesFromStorage(storage, directory,
index + 1);
};
filerequest.onerror = function(event) {
@ -129,6 +127,43 @@ define(function(require) {
}
};
};
/**
* Read the metadata.txt file in the given directory, and store its content
* in the current instance
*
* @param storage
* @param directory
*/
LocalArchive.prototype.readMetadataFileFromStorage = function(storage, directory) {
var currentLocalArchiveInstance = this;
var filerequest = storage.get(directory + '/metadata.txt');
filerequest.onsuccess = function() {
var metadataFile = filerequest.result;
currentLocalArchiveInstance.readMetadataFile(metadataFile);
};
filerequest.onerror = function(event) {
alert("error reading metadata.txt file in directory "
+ directory + " : " + event.target.error.name);
};
};
/**
* Read the metadata file, in order to populate its values in the current
* instance
* @param {File} file metadata.txt file
*/
LocalArchive.prototype.readMetadataFile = function(file) {
var currentLocalArchiveInstance = this;
var reader = new FileReader();
reader.onload = function(e) {
var metadata = e.target.result;
currentLocalArchiveInstance.language = /\nlanguage ?\= ?([^ \n]+)/.exec(metadata)[1];
currentLocalArchiveInstance.date = /\ndate ?\= ?([^ \n]+)/.exec(metadata)[1];
};
reader.readAsText(file);
};
/**
* Read the math files (math.idx and math.dat) in the given directory, and assign it to the
@ -137,7 +172,7 @@ define(function(require) {
* @param storage
* @param directory
*/
LocalArchive.prototype.readMathFiles = function(storage, directory) {
LocalArchive.prototype.readMathFilesFromStorage = function(storage, directory) {
var currentLocalArchiveInstance = this;
var filerequest1 = storage.get(directory + '/math.idx');
filerequest1.onsuccess = function() {
@ -724,6 +759,7 @@ define(function(require) {
*/
return {
LocalArchive: LocalArchive,
Title: Title
Title: Title,
endsWith : endsWith
};
});

View File

@ -26,6 +26,8 @@
<input type="file" id="mathIndexFile" />
<br /> Please select the file math.dat from the same dump:<br />
<input type="file" id="mathDataFile" />
<br /> Please select the file metadata.txt from the same dump:<br />
<input type="file" id="metadataFile" /><br />
<br />
<input type="button" id="runTests" value="Run tests" />
<div id="qunit"></div>

View File

@ -34,8 +34,8 @@ define(function(require) {
localArchive.dataFiles = document.getElementById('dataFiles').files;
localArchive.mathIndexFile = document.getElementById('mathIndexFile').files[0];
localArchive.mathDataFile = document.getElementById('mathDataFile').files[0];
localArchive.language = "small";
localArchive.date = "2010-08-14";
var metadataFile = document.getElementById('metadataFile').files[0];
localArchive.readMetadataFile(metadataFile);
module("evopedia");
asyncTest("check getTitlesStartingAtOffset 0", function() {