mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-08-11 15:28:34 -04:00

Former-commit-id: 727c78557a98457d622955b2c1b30a909f6e5a89 [formerly 3820e67fdd6c756ba92b0b4fdee9a0c6a5bc805b [formerly 427a6213b23abc46e869ed8d5f061691cd7cedad]] Former-commit-id: 0c9e50cac31aa043c2bb3d19c1907f3cbc819de7 Former-commit-id: 94d9599a6683a00eb486a46bf98edf797e985301
221 lines
8.2 KiB
JavaScript
221 lines
8.2 KiB
JavaScript
/**
|
|
* filecache.js: Generic cache for small, frequently read file slices.
|
|
* It discards cached blocks according to a least-recently-used algorithm.
|
|
* It is used primarily for fast Directory Entry lookup, speeding up binary search.
|
|
*
|
|
* Copyright 2020 Mossroy, peter-x, jaifroid and contributors
|
|
* License GPL v3:
|
|
*
|
|
* This file is part of Kiwix.
|
|
*
|
|
* Kiwix JS is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Kiwix JS is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Kiwix JS (file LICENSE). If not, see <http://www.gnu.org/licenses/>
|
|
*/
|
|
'use strict';
|
|
define(['q'], function(Q) {
|
|
/**
|
|
* Set maximum number of cache blocks of BLOCK_SIZE bytes each
|
|
* Maximum size of cache in bytes = MAX_CACHE_SIZE * BLOCK_SIZE
|
|
* @type {Integer}
|
|
*/
|
|
const MAX_CACHE_SIZE = 4000;
|
|
|
|
/**
|
|
* The maximum blocksize to read or store via the block cache (bytes)
|
|
* @type {Integer}
|
|
*/
|
|
const BLOCK_SIZE = 4096;
|
|
|
|
/**
|
|
* A Cache Entry
|
|
* @typedef CacheEntry
|
|
* @property {String} id The cache key (stored also in the entry)
|
|
* @property {CacheEntry} prev The previous linked cache entry
|
|
* @property {CacheEntry} next The next linked cache entry
|
|
* @property {Uint8Array} value The cached data
|
|
*/
|
|
|
|
/**
|
|
* A Block Cache employing a Least Recently Used caching strategy
|
|
* @typedef BlockCache
|
|
* @property {Integer} _limit The maximum number of entries in the cache
|
|
* @property {Map} _entries A map to store the cache keys and data
|
|
* @property {CacheEntry} _first The most recent entry in the cache
|
|
* @property {CacheEntry} _last The least recedntly used entry in the cache
|
|
*/
|
|
|
|
/**
|
|
* Creates a new cache with max size limit of MAX_CACHE_SIZE blocks
|
|
*/
|
|
function LRUCache() {
|
|
console.log('Creating cache of size ' + MAX_CACHE_SIZE + ' * ' + BLOCK_SIZE + ' bytes');
|
|
this._limit = MAX_CACHE_SIZE;
|
|
}
|
|
|
|
/**
|
|
* Tries to retrieve an element by its id. If it is not present in the cache, returns undefined; if it is present,
|
|
* then the value is returned and the entry is moved to the top of the cache
|
|
* @param {String} key The block cache entry key (byte offset + '' + file.id)
|
|
* @returns {Uint8Array|undefined} The requested cache data or undefined
|
|
*/
|
|
LRUCache.prototype.get = function (key) {
|
|
var entry = this._entries.get(key);
|
|
if (entry === undefined) {
|
|
return entry;
|
|
}
|
|
this.moveToTop(entry);
|
|
return entry.value;
|
|
};
|
|
|
|
/**
|
|
* Stores a value in the cache by id and prunes the least recently used entry if the cache is larger than MAX_CACHE_SIZE
|
|
* @param {String} key The key under which to store the value (byte offset + '' + file.id from start of ZIM archive)
|
|
* @param {Uint8Array} value The value to store in the cache
|
|
*/
|
|
LRUCache.prototype.store = function (key, value) {
|
|
var entry = this.get(key);
|
|
if (entry === undefined) {
|
|
entry = {
|
|
id: key,
|
|
prev: null,
|
|
next: null,
|
|
value: value
|
|
};
|
|
this._entries.set(key, entry);
|
|
this.insertAtTop(entry);
|
|
if (this._entries.size >= this._limit) {
|
|
var e = this._last;
|
|
this.unlink(e);
|
|
this._entries.delete(e.id);
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Delete a cache entry
|
|
* @param {CacheEntry} entry The entry to delete
|
|
*/
|
|
LRUCache.prototype.unlink = function(entry) {
|
|
if (entry.next === null) {
|
|
this._last = entry.prev;
|
|
} else {
|
|
entry.next.prev = entry.prev;
|
|
}
|
|
if (entry.prev === null) {
|
|
this._first = null;
|
|
} else {
|
|
entry.prev.next = entry.next;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Insert a cache entry at the top of the cache
|
|
* @param {CacheEntry} entry The entry to insert
|
|
*/
|
|
LRUCache.prototype.insertAtTop = function(entry) {
|
|
if (this._first === null) {
|
|
this._first = this._last = entry;
|
|
} else {
|
|
this._first.prev = entry;
|
|
entry.next = this._first;
|
|
this._first = entry;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Move a cache entry to the top of the cache
|
|
* @param {CacheEntry} entry The entry to move
|
|
*/
|
|
LRUCache.prototype.moveToTop = function(entry) {
|
|
this.unlink(entry);
|
|
this.insertAtTop(entry);
|
|
};
|
|
|
|
/**
|
|
|
|
* A new Block Cache
|
|
* @type {BlockCache}
|
|
*/
|
|
var cache = new LRUCache();
|
|
|
|
// Counters for reporting only
|
|
var hits = 0;
|
|
var misses = 0;
|
|
|
|
/**
|
|
* Initializes or resets the cache - this should be called whenever a new ZIM is loaded
|
|
*/
|
|
var init = function () {
|
|
console.log('Initialize or reset FileCache');
|
|
cache._entries = new Map();
|
|
// Initialize linked list of entries
|
|
cache._first = null;
|
|
cache._last = null;
|
|
};
|
|
/**
|
|
* Read a certain byte range in the given file, breaking the range into chunks that go through the cache
|
|
* If a read of more than BLOCK_SIZE * 2 (bytes) is requested, do not use the cache
|
|
* @param {Object} file The requested ZIM archive to read from
|
|
* @param {Integer} begin The byte from which to start reading
|
|
* @param {Integer} end The byte at which to stop reading (end will not be read)
|
|
* @return {Promise<Uint8Array>} A Promise that resolves to the correctly concatenated data from the cache
|
|
* or from the ZIM archive
|
|
*/
|
|
var read = function(file, begin, end) {
|
|
// Read large chunks bypassing the block cache because we would have to
|
|
// stitch together too many blocks and would clog the cache
|
|
if (end - begin > BLOCK_SIZE * 2) return file._readSplitSlice(begin, end);
|
|
var readRequests = [];
|
|
var blocks = {};
|
|
// Look for the requested data in the blocks: we may need to stitch together data from two or more blocks
|
|
for (var id = Math.floor(begin / BLOCK_SIZE) * BLOCK_SIZE; id < end; id += BLOCK_SIZE) {
|
|
var block = cache.get(id + '' + file.id);
|
|
if (block === undefined) {
|
|
// Data not in cache, so read from archive
|
|
misses++;
|
|
readRequests.push(function(offset) {
|
|
return file._readSplitSlice(offset, offset + BLOCK_SIZE).then(function(result) {
|
|
cache.store(offset + '' + file.id, result);
|
|
blocks[offset] = result;
|
|
});
|
|
}(id));
|
|
} else {
|
|
hits++;
|
|
blocks[id] = block;
|
|
}
|
|
}
|
|
if (misses + hits > 2000) {
|
|
console.log("** Block cache hit rate: " + Math.round(hits / (hits + misses) * 1000) / 10 + "% [ hits:" + hits + " / misses:" + misses + " ]");
|
|
hits = 0;
|
|
misses = 0;
|
|
}
|
|
// Wait for all the blocks to be read either from the cache or from the archive
|
|
return Q.all(readRequests).then(function() {
|
|
var result = new Uint8Array(end - begin);
|
|
var pos = 0;
|
|
// Stitch together the data parts in the right order
|
|
for (var i = Math.floor(begin / BLOCK_SIZE) * BLOCK_SIZE; i < end; i += BLOCK_SIZE) {
|
|
var b = Math.max(i, begin) - i;
|
|
var e = Math.min(end, i + BLOCK_SIZE) - i;
|
|
if (blocks[i].subarray) result.set(blocks[i].subarray(b, e), pos);
|
|
pos += e - b;
|
|
}
|
|
return result;
|
|
});
|
|
};
|
|
|
|
return {
|
|
read: read,
|
|
init: init
|
|
};
|
|
}); |