Random article feature : start reading titles after a newLine. Fixes #69

This commit is contained in:
mossroy 2014-03-06 17:49:41 +01:00
parent c5dd935f3d
commit bd2a224a96
2 changed files with 1040 additions and 1040 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,127 +1,127 @@
/** /**
* titleIterators.js : Various classes to iterate over titles, for example as a * titleIterators.js : Various classes to iterate over titles, for example as a
* result of searching. * result of searching.
* *
* Copyright 2014 Evopedia developers * Copyright 2014 Evopedia developers
* License GPL v3: * License GPL v3:
* *
* This file is part of Evopedia. * This file is part of Evopedia.
* *
* Evopedia is free software: you can redistribute it and/or modify * Evopedia is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or * the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* Evopedia is distributed in the hope that it will be useful, * Evopedia is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/> * along with Evopedia (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
*/ */
define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) { define(['utf8', 'title', 'util', 'jquery'], function(utf8, evopediaTitle, util, jQuery) {
// Maximum length of a title // Maximum length of a title
// 300 bytes is arbitrary : we actually do not really know how long the titles will be // 300 bytes is arbitrary : we actually do not really know how long the titles will be
// But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough // But mediawiki titles seem to be limited to ~200 bytes, so 300 should be more than enough
var MAX_TITLE_LENGTH = 300; var MAX_TITLE_LENGTH = 300;
/** /**
* Iterates over all titles starting at the given offset. * Iterates over all titles starting at the given offset.
* The asynchronous method advance has to be called before this.title is * The asynchronous method advance has to be called before this.title is
* valid. * valid.
* @param archive * @param archive
* @param offset * @param offset
*/ */
function SequentialTitleIterator(archive, offset) { function SequentialTitleIterator(archive, offset) {
this._titleFile = archive.titleFile; this._titleFile = archive.titleFile;
this._archive = archive; this._archive = archive;
this._offset = offset; this._offset = offset;
this.title = null; this.title = null;
}; };
/** /**
* Advances to the next title (or the first), if possible. * Advances to the next title (or the first), if possible.
* @returns jQuery promise containing the next title or null if there is no * @returns jQuery promise containing the next title or null if there is no
* next title * next title
*/ */
SequentialTitleIterator.prototype.advance = function() { SequentialTitleIterator.prototype.advance = function() {
if (this._offset >= this._titleFile.size) { if (this._offset >= this._titleFile.size) {
this.title = null; this.title = null;
return jQuery.when(this.title); return jQuery.when(this.title);
} }
var that = this; var that = this;
return util.readFileSlice(this._titleFile, this._offset, return util.readFileSlice(this._titleFile, this._offset,
this._offset + MAX_TITLE_LENGTH).then(function(byteArray) { this._offset + MAX_TITLE_LENGTH).then(function(byteArray) {
var newLineIndex = 15; var newLineIndex = 15;
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
newLineIndex++; newLineIndex++;
} }
var encodedTitle = byteArray.subarray(0, newLineIndex); var encodedTitle = byteArray.subarray(0, newLineIndex);
that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset); that._title = evopediaTitle.Title.parseTitle(encodedTitle, that._archive, that._offset);
that._offset += newLineIndex + 1; that._offset += newLineIndex + 1;
return that._title; return that._title;
}); });
}; };
/** /**
* Searches for the offset into the given title file where the first title * Searches for the offset into the given title file where the first title
* with the given prefix (or lexicographically larger) is located. * with the given prefix (or lexicographically larger) is located.
* The given function normalize is applied to every title before comparison. * The given function normalize is applied to every title before comparison.
* @param titleFile * @param titleFile
* @param prefix * @param prefix
* @param normalize function to be applied to every title before comparison * @param normalize function to be applied to every title before comparison
* @returns jQuery promise giving the offset * @returns jQuery promise giving the offset
*/ */
function findPrefixOffset(titleFile, prefix, normalize) { function findPrefixOffset(titleFile, prefix, normalize) {
prefix = normalize(prefix); prefix = normalize(prefix);
var lo = 0; var lo = 0;
var hi = titleFile.size; var hi = titleFile.size;
var iterate = function() { var iterate = function() {
if (lo >= hi - 1) { if (lo >= hi - 1) {
if (lo > 0) if (lo > 0)
lo += 2; // Let lo point to the start of an entry lo += 2; // Let lo point to the start of an entry
return jQuery.when(lo); return jQuery.when(lo);
} else { } else {
var mid = Math.floor((lo + hi) / 2); var mid = Math.floor((lo + hi) / 2);
return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) { return util.readFileSlice(titleFile, mid, mid + MAX_TITLE_LENGTH).then(function(byteArray) {
// Look for the index of the next NewLine // Look for the index of the next NewLine
var newLineIndex = 0; var newLineIndex = 0;
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
newLineIndex++; newLineIndex++;
} }
var startIndex = 0; var startIndex = 0;
if (mid > 0) { if (mid > 0) {
startIndex = newLineIndex + 16; startIndex = newLineIndex + 16;
newLineIndex = startIndex; newLineIndex = startIndex;
// Look for the index of the next NewLine // Look for the index of the next NewLine
while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) { while (newLineIndex < byteArray.length && byteArray[newLineIndex] !== 10) {
newLineIndex++; newLineIndex++;
} }
} }
if (newLineIndex === startIndex) { if (newLineIndex === startIndex) {
// End of file reached // End of file reached
hi = mid; hi = mid;
} else { } else {
var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex))); var normalizedTitle = normalize(utf8.parse(byteArray.subarray(startIndex, newLineIndex)));
if (normalizedTitle < prefix) { if (normalizedTitle < prefix) {
lo = mid + newLineIndex - 1; lo = mid + newLineIndex - 1;
} else { } else {
hi = mid; hi = mid;
} }
} }
return iterate(); return iterate();
}); });
} }
}; };
return iterate(); return iterate();
} }
/** /**
* Functions and classes exposed by this module * Functions and classes exposed by this module
*/ */
return { return {
SequentialTitleIterator : SequentialTitleIterator, SequentialTitleIterator : SequentialTitleIterator,
findPrefixOffset : findPrefixOffset, findPrefixOffset : findPrefixOffset,
MAX_TITLE_LENGTH : MAX_TITLE_LENGTH MAX_TITLE_LENGTH : MAX_TITLE_LENGTH
}; };
}); });