mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-09 12:19:46 -04:00
Port proposed caching changes from Kiwix JS
Former-commit-id: 727c78557a98457d622955b2c1b30a909f6e5a89 [formerly 3820e67fdd6c756ba92b0b4fdee9a0c6a5bc805b [formerly 427a6213b23abc46e869ed8d5f061691cd7cedad]] Former-commit-id: 0c9e50cac31aa043c2bb3d19c1907f3cbc819de7 Former-commit-id: 94d9599a6683a00eb486a46bf98edf797e985301
This commit is contained in:
parent
d797531593
commit
58944df5db
@ -1,5 +1,7 @@
|
||||
/**
|
||||
* filecache.js: Generic least-recently-used-cache used for reading file chunks.
|
||||
* filecache.js: Generic cache for small, frequently read file slices.
|
||||
* It discards cached blocks according to a least-recently-used algorithm.
|
||||
* It is used primarily for fast Directory Entry lookup, speeding up binary search.
|
||||
*
|
||||
* Copyright 2020 Mossroy, peter-x, jaifroid and contributors
|
||||
* License GPL v3:
|
||||
@ -35,28 +37,39 @@ define(['q'], function(Q) {
|
||||
const BLOCK_SIZE = 4096;
|
||||
|
||||
/**
|
||||
* Creates a new cache with max size limit
|
||||
* @param {Integer} limit The maximum number of blocks of BLOCK_SIZE to be cached
|
||||
* A Cache Entry
|
||||
* @typedef CacheEntry
|
||||
* @property {String} id The cache key (stored also in the entry)
|
||||
* @property {CacheEntry} prev The previous linked cache entry
|
||||
* @property {CacheEntry} next The next linked cache entry
|
||||
* @property {Uint8Array} value The cached data
|
||||
*/
|
||||
function LRUCache(limit) {
|
||||
console.log("Creating cache of size " + limit);
|
||||
this._limit = limit;
|
||||
this._size = 0;
|
||||
// Mapping from id to {value: , prev: , next: }
|
||||
this._entries = {};
|
||||
// linked list of entries
|
||||
this._first = null;
|
||||
this._last = null;
|
||||
|
||||
/**
|
||||
* A Block Cache employing a Least Recently Used caching strategy
|
||||
* @typedef BlockCache
|
||||
* @property {Integer} _limit The maximum number of entries in the cache
|
||||
* @property {Map} _entries A map to store the cache keys and data
|
||||
* @property {CacheEntry} _first The most recent entry in the cache
|
||||
* @property {CacheEntry} _last The least recedntly used entry in the cache
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creates a new cache with max size limit of MAX_CACHE_SIZE blocks
|
||||
*/
|
||||
function LRUCache() {
|
||||
console.log('Creating cache of size ' + MAX_CACHE_SIZE + ' * ' + BLOCK_SIZE + ' bytes');
|
||||
this._limit = MAX_CACHE_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to retrieve an element by its id. If it is not present in the cache, returns undefined; if it is present,
|
||||
* then the value is returned and the entry is moved to the top of the cache
|
||||
* @param {String} id The block cache entry key
|
||||
* @param {String} key The block cache entry key (byte offset + '' + file.id)
|
||||
* @returns {Uint8Array|undefined} The requested cache data or undefined
|
||||
*/
|
||||
LRUCache.prototype.get = function(id) {
|
||||
var entry = this._entries[id];
|
||||
LRUCache.prototype.get = function (key) {
|
||||
var entry = this._entries.get(key);
|
||||
if (entry === undefined) {
|
||||
return entry;
|
||||
}
|
||||
@ -66,30 +79,31 @@ define(['q'], function(Q) {
|
||||
|
||||
/**
|
||||
* Stores a value in the cache by id and prunes the least recently used entry if the cache is larger than MAX_CACHE_SIZE
|
||||
* @param {String} id The key under which to store the value (consists of filename + file number)
|
||||
* @param {Uint16Array} value The value to store in the cache
|
||||
* @param {String} key The key under which to store the value (byte offset + '' + file.id from start of ZIM archive)
|
||||
* @param {Uint8Array} value The value to store in the cache
|
||||
*/
|
||||
LRUCache.prototype.store = function(id, value) {
|
||||
var entry = this._entries[id];
|
||||
LRUCache.prototype.store = function (key, value) {
|
||||
var entry = this.get(key);
|
||||
if (entry === undefined) {
|
||||
entry = this._entries[id] = {id: id, prev: null, next: null, value: value};
|
||||
entry = {
|
||||
id: key,
|
||||
prev: null,
|
||||
next: null,
|
||||
value: value
|
||||
};
|
||||
this._entries.set(key, entry);
|
||||
this.insertAtTop(entry);
|
||||
if (this._size >= this._limit) {
|
||||
if (this._entries.size >= this._limit) {
|
||||
var e = this._last;
|
||||
this.unlink(e);
|
||||
delete this._entries[e.id];
|
||||
} else {
|
||||
this._size++;
|
||||
this._entries.delete(e.id);
|
||||
}
|
||||
} else {
|
||||
entry.value = value;
|
||||
this.moveToTop(entry);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Delete a cache entry
|
||||
* @param {String} entry The entry to delete
|
||||
* @param {CacheEntry} entry The entry to delete
|
||||
*/
|
||||
LRUCache.prototype.unlink = function(entry) {
|
||||
if (entry.next === null) {
|
||||
@ -106,7 +120,7 @@ define(['q'], function(Q) {
|
||||
|
||||
/**
|
||||
* Insert a cache entry at the top of the cache
|
||||
* @param {String} entry The entry to insert
|
||||
* @param {CacheEntry} entry The entry to insert
|
||||
*/
|
||||
LRUCache.prototype.insertAtTop = function(entry) {
|
||||
if (this._first === null) {
|
||||
@ -120,27 +134,42 @@ define(['q'], function(Q) {
|
||||
|
||||
/**
|
||||
* Move a cache entry to the top of the cache
|
||||
* @param {String} entry The entry to move
|
||||
* @param {CacheEntry} entry The entry to move
|
||||
*/
|
||||
LRUCache.prototype.moveToTop = function(entry) {
|
||||
this.unlink(entry);
|
||||
this.insertAtTop(entry);
|
||||
};
|
||||
|
||||
// Create a new cache
|
||||
var cache = new LRUCache(MAX_CACHE_SIZE);
|
||||
/**
|
||||
|
||||
* A new Block Cache
|
||||
* @type {BlockCache}
|
||||
*/
|
||||
var cache = new LRUCache();
|
||||
|
||||
// Counters for reporting only
|
||||
var hits = 0;
|
||||
var misses = 0;
|
||||
|
||||
/**
|
||||
* Initializes or resets the cache - this should be called whenever a new ZIM is loaded
|
||||
*/
|
||||
var init = function () {
|
||||
console.log('Initialize or reset FileCache');
|
||||
cache._entries = new Map();
|
||||
// Initialize linked list of entries
|
||||
cache._first = null;
|
||||
cache._last = null;
|
||||
};
|
||||
/**
|
||||
* Read a certain byte range in the given file, breaking the range into chunks that go through the cache
|
||||
* If a read of more than blocksize (bytes) is requested, do not use the cache
|
||||
* If a read of more than BLOCK_SIZE * 2 (bytes) is requested, do not use the cache
|
||||
* @param {Object} file The requested ZIM archive to read from
|
||||
* @param {Integer} begin The byte from which to start reading
|
||||
* @param {Integer} end The byte at which to stop reading (end will not be read)
|
||||
* @return {Promise<Uint8Array>} A Promise that resolves to the correctly concatenated data from the split ZIM file set
|
||||
* @return {Promise<Uint8Array>} A Promise that resolves to the correctly concatenated data from the cache
|
||||
* or from the ZIM archive
|
||||
*/
|
||||
var read = function(file, begin, end) {
|
||||
// Read large chunks bypassing the block cache because we would have to
|
||||
@ -148,19 +177,21 @@ define(['q'], function(Q) {
|
||||
if (end - begin > BLOCK_SIZE * 2) return file._readSplitSlice(begin, end);
|
||||
var readRequests = [];
|
||||
var blocks = {};
|
||||
for (var i = Math.floor(begin / BLOCK_SIZE) * BLOCK_SIZE; i < end; i += BLOCK_SIZE) {
|
||||
var block = cache.get(file.name + i);
|
||||
// Look for the requested data in the blocks: we may need to stitch together data from two or more blocks
|
||||
for (var id = Math.floor(begin / BLOCK_SIZE) * BLOCK_SIZE; id < end; id += BLOCK_SIZE) {
|
||||
var block = cache.get(id + '' + file.id);
|
||||
if (block === undefined) {
|
||||
// Data not in cache, so read from archive
|
||||
misses++;
|
||||
readRequests.push(function(offset) {
|
||||
return file._readSplitSlice(offset, offset + BLOCK_SIZE).then(function(result) {
|
||||
cache.store(file.name + offset, result);
|
||||
cache.store(offset + '' + file.id, result);
|
||||
blocks[offset] = result;
|
||||
});
|
||||
}(i));
|
||||
}(id));
|
||||
} else {
|
||||
hits++;
|
||||
blocks[i] = block;
|
||||
blocks[id] = block;
|
||||
}
|
||||
}
|
||||
if (misses + hits > 2000) {
|
||||
@ -168,9 +199,11 @@ define(['q'], function(Q) {
|
||||
hits = 0;
|
||||
misses = 0;
|
||||
}
|
||||
// Wait for all the blocks to be read either from the cache or from the archive
|
||||
return Q.all(readRequests).then(function() {
|
||||
var result = new Uint8Array(end - begin);
|
||||
var pos = 0;
|
||||
// Stitch together the data parts in the right order
|
||||
for (var i = Math.floor(begin / BLOCK_SIZE) * BLOCK_SIZE; i < end; i += BLOCK_SIZE) {
|
||||
var b = Math.max(i, begin) - i;
|
||||
var e = Math.min(end, i + BLOCK_SIZE) - i;
|
||||
@ -182,6 +215,7 @@ define(['q'], function(Q) {
|
||||
};
|
||||
|
||||
return {
|
||||
read: read
|
||||
read: read,
|
||||
init: init
|
||||
};
|
||||
});
|
@ -22,6 +22,13 @@
|
||||
'use strict';
|
||||
define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry', 'filecache'], function(xz, zstd, util, utf8, Q, zimDirEntry, FileCache) {
|
||||
|
||||
/**
|
||||
* A variable to keep track of the currently loaded ZIM archive, e.g., for labelling cache entries
|
||||
* The ID is temporary and is reset to 0 at each session start; it is incremented by 1 each time a new ZIM is loaded
|
||||
* @type {Integer}
|
||||
*/
|
||||
var tempFileId = 0;
|
||||
|
||||
var readInt = function (data, offset, size) {
|
||||
var r = 0;
|
||||
for (var i = 0; i < size; i++) {
|
||||
@ -38,15 +45,15 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
*
|
||||
* @typedef ZIMFile
|
||||
* @property {Array<File>} _files Array of ZIM files
|
||||
* @property {String} name Abstract name of ZIM file set
|
||||
* @property {Integer} articleCount total number of articles
|
||||
* @property {Integer} clusterCount total number of clusters
|
||||
* @property {Integer} urlPtrPos position of the directory pointerlist ordered by URL
|
||||
* @property {Integer} titlePtrPos position of the directory pointerlist ordered by title
|
||||
* @property {Integer} clusterPtrPos position of the cluster pointer list
|
||||
* @property {Integer} mimeListPos position of the MIME type list (also header size)
|
||||
* @property {Integer} mainPage main page or 0xffffffff if no main page
|
||||
* @property {Integer} layoutPage layout page or 0xffffffffff if no layout page
|
||||
* @property {Integer} id Arbitrary numeric ZIM id used to track the currently loaded archive
|
||||
* @property {Integer} articleCount Total number of articles
|
||||
* @property {Integer} clusterCount Total number of clusters
|
||||
* @property {Integer} urlPtrPos Position of the directory pointerlist ordered by URL
|
||||
* @property {Integer} titlePtrPos Position of the directory pointerlist ordered by title
|
||||
* @property {Integer} clusterPtrPos Position of the cluster pointer list
|
||||
* @property {Integer} mimeListPos Position of the MIME type list (also header size)
|
||||
* @property {Integer} mainPage Main page or 0xffffffff if no main page
|
||||
* @property {Integer} layoutPage Layout page or 0xffffffffff if no layout page
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -70,7 +77,7 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
};
|
||||
|
||||
/**
|
||||
* Read a slice from the ZIM set starting at offset for size of bytes
|
||||
* Read a slice from the FileCache or ZIM set, starting at offset for size of bytes
|
||||
* @param {Integer} offset The absolute offset from the start of the ZIM file or file set at which to start reading
|
||||
* @param {Integer} size The number of bytes to read
|
||||
* @returns {Promise<Uint8Array>} A Promise for a Uint8Array containing the requested data
|
||||
@ -105,7 +112,6 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
return readRequests[0];
|
||||
} else {
|
||||
// Wait until all are resolved and concatenate.
|
||||
console.log("CONCAT");
|
||||
return Q.all(readRequests).then(function(arrays) {
|
||||
var concatenated = new Uint8Array(end - begin);
|
||||
var offset = 0;
|
||||
@ -119,7 +125,7 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
};
|
||||
|
||||
/**
|
||||
* Read and parse a a Directory Entry at the given archive offset
|
||||
* Read and parse a Directory Entry at the given archive offset
|
||||
* @param {Integer} offset The offset at which the DirEntry is located
|
||||
* @returns {Promise<DirEntry>} A Promise for the requested DirEntry
|
||||
*/
|
||||
@ -279,9 +285,8 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
var urlPtrPos = readInt(header, 32, 8);
|
||||
return readMimetypeMap(fileArray[0], mimeListPos, urlPtrPos).then(function (data) {
|
||||
var zf = new ZIMFile(fileArray);
|
||||
// Line below provides an abstracted filename in case the ZIM file is split into multiple parts;
|
||||
// it greatly simplifies coding of the block cache, as it can store and respond to offsets from the start of the file set
|
||||
zf.name = fileArray[0].name.replace(/(\.zim)\w\w$/i, '$1');
|
||||
// Line below provides a temporary, per-session numeric ZIM ID used in filecache.js
|
||||
zf.id = tempFileId++;
|
||||
zf.articleCount = readInt(header, 24, 4);
|
||||
zf.clusterCount = readInt(header, 28, 4);
|
||||
zf.urlPtrPos = urlPtrPos;
|
||||
@ -291,6 +296,8 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'q', 'zimDirEntry',
|
||||
zf.mainPage = readInt(header, 64, 4);
|
||||
zf.layoutPage = readInt(header, 68, 4);
|
||||
zf.mimeTypes = data;
|
||||
// Initialize or reset the FileCache
|
||||
FileCache.init();
|
||||
return zf;
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user