mirror of
https://github.com/kiwix/kiwix-js-pwa.git
synced 2025-09-08 11:48:26 -04:00
Add updates from kiwix-js
Former-commit-id: 25d2700cb2ecc3c22bcd70833ed514fc6a72724d [formerly ff1f960c784b76dc2d2ecd4d7bdbd58524d1f14f [formerly 52dfa73d8c84e3816e2c09c9c03304efff590076]] Former-commit-id: 4fb64b04d17666efb8f42fabf45e413c582dde45 Former-commit-id: 81c61791af26b259ec2510acb9e73dc48ae6d106
This commit is contained in:
parent
ab629f71fe
commit
c6489080ff
@ -25,6 +25,16 @@ define(['q', 'zstdec'], function(Q) {
|
||||
// Note that we include zstdec above in requireJS definition, but we cannot change the name in the function list
|
||||
// There is no longer any need to load it in index.html
|
||||
// For explanation of loading method below to avoid conflicts, see https://github.com/emscripten-core/emscripten/blob/master/src/settings.js
|
||||
|
||||
/**
|
||||
* The ZSTD Decoder instance
|
||||
* @constructor Constructs the zd object representing a ZSTD decoder Emscripten instance
|
||||
* @property {Integer} _decHandle The decoder stream context object in asm memory (to be re-used for each decoder operation)
|
||||
* @property {Object} _inBuffer A JS copy of the inBuffer structure to be set in asm memory (malloc)
|
||||
* @property {Object} _outBuffer A JS copy of the outBuffer structure to be set in asm memory (malloc)
|
||||
* @property {Integer} _chunkSize The number of compressed bytes to feed to the decompressor in any one read loop
|
||||
|
||||
*/
|
||||
var zd;
|
||||
ZD().then(function(instance) {
|
||||
// Instantiate the zd object
|
||||
@ -35,6 +45,40 @@ define(['q', 'zstdec'], function(Q) {
|
||||
// Get a permanent decoder handle (pointer to control structure)
|
||||
// NB there is no need to change this handle even between ZIM loads: zstddeclib encourages re-using assigned structures
|
||||
zd._decHandle = zd._ZSTD_createDStream();
|
||||
// DEV set chunkSize according to memory environment; for systems with plenty of memory,
|
||||
// zd can provide a max recommended size with zd._chunkSize = zd._ZSTD_DStreamInSize();
|
||||
zd._chunkSize = 5 * 1024;
|
||||
|
||||
// Initialize inBuffer
|
||||
zd._inBuffer = {
|
||||
ptr: null, /* pointer to this inBuffer structure in w/asm memory */
|
||||
src: null, /* void* src < start of input buffer */
|
||||
size: zd._chunkSize, /* size_t size < size of input buffer */
|
||||
pos: 0 /* size_t pos; < position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
};
|
||||
// Reserve w/asm memory for the inBuffer structure (we will populate assigned memory later)
|
||||
zd._inBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
|
||||
// Reserve w/asm memory for the inBuffer data stream
|
||||
zd._inBuffer.src = mallocOrDie(zd._inBuffer.size);
|
||||
|
||||
// DEV: Size of outBuffer is currently set as recommended by zd._ZSTD_DStreamOutSize() below; if you are running into
|
||||
// memory issues, it may be possible to reduce memory consumption by setting a smaller outBuffer size here and
|
||||
// reompiling zstdec.js with lower TOTAL_MEMORY (or just search for INITIAL_MEMORY in zstdec.js and change it)
|
||||
var recOutBufSize = zd._chunkSize * 4;
|
||||
var maxOutBufSize = zd._ZSTD_DStreamOutSize();
|
||||
var outBufSize = recOutBufSize > maxOutBufSize ? maxOutBufSize : recOutBufSize;
|
||||
|
||||
// Initialize outBuffer
|
||||
zd._outBuffer = {
|
||||
ptr: null, /* pointer to this outBuffer structure in asm/wasm memory */
|
||||
dst: null, /* void* dst < start of output buffer (pointer) */
|
||||
size: outBufSize, /* size_t size < size of output buffer */
|
||||
pos: 0 /* size_t pos < position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
};
|
||||
// Reserve w/asm memory for the outBuffer structure
|
||||
zd._outBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
|
||||
// Reserve w/asm memory for the outBuffer data steam
|
||||
zd._outBuffer.dst = mallocOrDie(zd._outBuffer.size);
|
||||
});
|
||||
|
||||
/**
|
||||
@ -51,33 +95,26 @@ define(['q', 'zstdec'], function(Q) {
|
||||
|
||||
/**
|
||||
* @typedef Decompressor
|
||||
* @property {Integer} _chunkSize The amount to feed to the decompressor in any one read loop
|
||||
* @property {FileReader} _reader The filereader to use (uses plain blob reader defined in zimfile.js)
|
||||
* @property {Integer} _inStreamPos The current known position in the steam of compressed bytes
|
||||
* @property {Integer} _inStreamChunkedPos The position once the currently loaded chunk will have been consumed
|
||||
* @property {Integer} _outStreamPos The position in the decoded byte stream (offset from start of cluster)
|
||||
* @property {Array} _outDataBuf The buffer that stores decoded bytes (it is set to the requested blob's lenght, and when full, the data are returned)
|
||||
* @property {Array} _outDataBuf The buffer that stores decoded bytes (it is set to the requested blob's length, and when full, the data are returned)
|
||||
* @property {Integer} _outDataBufPos The number of bytes of the requested blob decoded so far
|
||||
* @property {Object} _inBuffer A JS copy of the inBuffer structure to be set in decompressor memory (malloc)
|
||||
* @property {Object} _outBuffer A JS copy of the outBuffer structure to be set in decompressor memory (malloc)
|
||||
*/
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
* @param {FileReader} reader
|
||||
* @param {Integer} chunkSize
|
||||
* @returns {Decompressor}
|
||||
* @param {FileReader} reader The reader used to extract file slices (defined in zimfile.js)
|
||||
*/
|
||||
function Decompressor(reader, chunkSize) {
|
||||
this._chunkSize = chunkSize || 5 * 1024;
|
||||
// this._chunkSize = chunkSize || zd._ZSTD_DStreamInSize();
|
||||
function Decompressor(reader) {
|
||||
this._reader = reader;
|
||||
}
|
||||
/**
|
||||
* Read length bytes, offset into the decompressed stream. Consecutive calls may only
|
||||
* advance in the stream and may not overlap.
|
||||
* @param {Integer} offset Offset from which to start reading
|
||||
* @param {Integer} length Number of bytes to read
|
||||
* Set up the decompression stream, and initiate a read loop to decompress from the beginning of the cluster
|
||||
* until we reach <offset> in the decompressed byte stream
|
||||
* @param {Integer} offset Cluster offset (in deocmpressed stream) from which to start reading
|
||||
* @param {Integer} length Number of decompressed bytes to read
|
||||
* @returns {Promise<ArrayBuffer>} Promise for an ArrayBuffer with decoded data
|
||||
*/
|
||||
Decompressor.prototype.readSlice = function(offset, length) {
|
||||
@ -87,28 +124,6 @@ define(['q', 'zstdec'], function(Q) {
|
||||
this._outStreamPos = 0;
|
||||
this._outDataBuf = new Int8Array(new ArrayBuffer(length));
|
||||
this._outDataBufPos = 0;
|
||||
|
||||
// Initialize inBuffer
|
||||
this._inBuffer = {
|
||||
ptr: null, /* pointer to this inBuffer structure in w/asm memory */
|
||||
src: null, /* void* src < start of input buffer */
|
||||
size: length, /* size_t size < size of input buffer */
|
||||
pos: 0 /* size_t pos; < position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
};
|
||||
// Reserve w/asm memory for the outBuffer structure
|
||||
this._inBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
|
||||
// DEV: Size of outBuffer is currently set as recommended by zd._ZSTD_DStreamOutSize() below; if you are running into
|
||||
// memory issues, it may be possible to reduce memory consumption by setting asmaller outBuffer size here and
|
||||
// reompiling zstdec.js with lower TOTAL_MEMORY (or just search for INITIAL_MEMORY in zstdec.js and change it)
|
||||
var recOutbufSize = zd._ZSTD_DStreamOutSize();
|
||||
// Initialize outBuffer
|
||||
this._outBuffer = {
|
||||
ptr: null, /* pointer to this outBuffer structure in asm/wasm memory */
|
||||
dst: null, /* void* dst < start of output buffer (pointer) */
|
||||
size: recOutbufSize, /* size_t size < size of output buffer */
|
||||
pos: 0 /* size_t pos < position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
};
|
||||
this._outBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
|
||||
var ret = zd._ZSTD_initDStream(zd._decHandle);
|
||||
if (zd._ZSTD_isError(ret)) {
|
||||
return Q.reject('Failed to initialize ZSTD decompression');
|
||||
@ -116,27 +131,23 @@ define(['q', 'zstdec'], function(Q) {
|
||||
|
||||
var that = this;
|
||||
return this._readLoop(offset, length).then(function(data) {
|
||||
// DEV: These structures are a known fixed length and could be assigned once, avoiding the need to free them
|
||||
// currently they are re-assigned on each blob request; consider changing this if memory usage appears to grow over time
|
||||
zd._free(that._inBuffer.src);
|
||||
zd._free(that._inBuffer.ptr);
|
||||
zd._free(that._outBuffer.dst);
|
||||
zd._free(that._outBuffer.ptr);
|
||||
// DEV: Freeing zd._decHandle is not needed, and actually increases memory consumption (crashing zstddeclib)
|
||||
// The library explicitly encourages re-using assigned structures and handles
|
||||
// DEV: We are re-using all the allocated w/asm memory, so we do not need to free any of structures assigned wiht _malloc
|
||||
// However, should you need to free assigned structures use, e.g., zd._free(zd._inBuffer.src);
|
||||
// Additionally, freeing zd._decHandle is not needed, and actually increases memory consumption (crashing zstddeclib)
|
||||
// Should you need to free the decoder stream handle, use command below, but be sure to create a new stream control object
|
||||
// before attempting further decompression
|
||||
// zd._ZSTD_freeDStream(zd._decHandle);
|
||||
busy = false;
|
||||
console.log("Freed all data structures.");
|
||||
return data;
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Reads stream of data from file offset for length of bytes to send to the decompresor
|
||||
* This function ensures that only one decompression runs at a time
|
||||
* @param {Integer} offset The file offset at which to begin reading compressed data
|
||||
* @param {Integer} length The amount of data to read
|
||||
* @returns {Promise} A Promise for the read data
|
||||
* This function ensures that only one decompression runs at a time, launching readSlice() only when
|
||||
* the decompressor is no longer busy
|
||||
* @param {Integer} offset The cluster offset (in decompressed stream) at which the requested blob resides
|
||||
* @param {Integer} length The number of decompressed bytes to read
|
||||
* @returns {Promise} A Promise for the readSlice() function
|
||||
*/
|
||||
Decompressor.prototype.readSliceSingleThread = function (offset, length) {
|
||||
if (!busy) {
|
||||
@ -156,15 +167,16 @@ define(['q', 'zstdec'], function(Q) {
|
||||
|
||||
/**
|
||||
* The main loop for sending compressed data to the decompressor and retrieving decompressed bytes
|
||||
* @param {Integer} offset The offset in the *decompressed* byte stream at which the requeste blob resides
|
||||
* Consecutive calls to readLoop may only advance in the stream and may not overlap
|
||||
* @param {Integer} offset The offset in the *decompressed* byte stream at which the requested blob resides
|
||||
* @param {Integer} length The deomcpressed size of the requested blob
|
||||
* @param {Integer} dataRequest The recommended number of bytes the docompressor has requested
|
||||
* @returns {Promise<Int8Array>} A Promise for an Int8Array containing the requested blob's decompressed bytes
|
||||
*/
|
||||
Decompressor.prototype._readLoop = function(offset, length) {
|
||||
Decompressor.prototype._readLoop = function(offset, length, dataRequest) {
|
||||
var that = this;
|
||||
return this._fillInBufferIfNeeded(offset, length).then(function() {
|
||||
var ret = zd._ZSTD_decompressStream(zd._decHandle, that._outBuffer.ptr, that._inBuffer.ptr);
|
||||
// var ret = zd._ZSTD_decompressStream_simpleArgs(that._decHandle, that._outBuffer.ptr, that._outBuffer.size, 0, that._inBuffer.ptr, that._inBuffer.size, 0);
|
||||
return this._fillInBufferIfNeeded(offset, length, dataRequest).then(function() {
|
||||
var ret = zd._ZSTD_decompressStream(zd._decHandle, zd._outBuffer.ptr, zd._inBuffer.ptr);
|
||||
if (zd._ZSTD_isError(ret)) {
|
||||
var errorMessage = "Failed to decompress data stream!\n" + zd.getErrorString(ret);
|
||||
console.error(errorMessage);
|
||||
@ -176,17 +188,16 @@ define(['q', 'zstdec'], function(Q) {
|
||||
finished = true;
|
||||
} else if (ret > 0) {
|
||||
// supply more data
|
||||
that._inBuffer.size = ret;
|
||||
zd._inBuffer.size = ret;
|
||||
}
|
||||
|
||||
// Get updated inbuffer values for processing on the JS sice
|
||||
// NB the zd.Decoder will read these values from its own buffers
|
||||
var ibx32ptr = that._inBuffer.ptr >> 2;
|
||||
that._inBuffer.pos = zd.HEAP32[ibx32ptr + 2];
|
||||
var ibx32ptr = zd._inBuffer.ptr >> 2;
|
||||
zd._inBuffer.pos = zd.HEAP32[ibx32ptr + 2];
|
||||
|
||||
// Get updated outbuffer values
|
||||
var obx32ptr = that._outBuffer.ptr >> 2;
|
||||
// that._outBuffer.size = zd.HEAP32[obx32ptr + 1];
|
||||
var obx32ptr = zd._outBuffer.ptr >> 2;
|
||||
var outPos = zd.HEAP32[obx32ptr + 2];
|
||||
|
||||
// If data have been decompressed, check to see whether the data are in the offset range we need
|
||||
@ -195,69 +206,59 @@ define(['q', 'zstdec'], function(Q) {
|
||||
console.log('**Copying decompressed bytes**\ncopyStart: ' + copyStart);
|
||||
if (copyStart < 0) copyStart = 0;
|
||||
for (var i = copyStart; i < outPos && that._outDataBufPos < that._outDataBuf.length; i++)
|
||||
that._outDataBuf[that._outDataBufPos++] = zd.HEAP8[that._outBuffer.dst + i];
|
||||
that._outDataBuf[that._outDataBufPos++] = zd.HEAP8[zd._outBuffer.dst + i];
|
||||
}
|
||||
if (that._outDataBufPos === that._outDataBuf.length) finished = true;
|
||||
// Increment the byte stream positions
|
||||
that._inStreamPos += that._inBuffer.pos;
|
||||
that._inStreamPos += zd._inBuffer.pos;
|
||||
that._outStreamPos += outPos;
|
||||
// DEV: if outPos is > 0, then we have either copied all data from outBuffer, or we can now throw those data away
|
||||
// because they are before our required offset
|
||||
// Se we can now reset the asm outBuffer.pos field to 0
|
||||
zd.HEAP32[obx32ptr + 2] = 0;
|
||||
// However, this isn't necessary becasuse zd._outBuffer.pos is always 0, and the buffer will be reset - WILL IT???
|
||||
// do not change the _outBuffer.size field locally; _outBuffer.size is the maximum amount the ZSTD codec is allowed
|
||||
// to decode in one go, but even if it is only partially written, we just copy the decoded bytes and reset _ouBuffer.pos to 0
|
||||
|
||||
// TESTING (remove before merge)
|
||||
console.log("Offset: " + offset + "\nLength: " + length + "\ninStreamPos: " + that._inStreamPos + "\noutStreamPos: " + that._outStreamPos);
|
||||
|
||||
if (outPos > 0) {
|
||||
// We have either copied all data from outBuffer, or we can throw those data away because they are before our required offset
|
||||
// This resets the outbuffer->ptr to 0, so we can re-use the outbuffer memory space without re-initializing
|
||||
// Below is the 'raw' way to do this for info, but the JS copy will be set in fillInBufferIfNeeded()
|
||||
// zd.HEAP32[obx32ptr + 2] = 0;
|
||||
that._outBuffer.pos = 0;
|
||||
}
|
||||
if (finished) {
|
||||
console.log("Read loop finished.");
|
||||
return that._outDataBuf;
|
||||
} else {
|
||||
return that._readLoop(offset, length);
|
||||
return that._readLoop(offset, length, ret);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Fills in the instream buffer if needed
|
||||
* @param {Integer} currOffset The current read offset
|
||||
* @param {Integer} len The decompressed length of data requested
|
||||
* @param {Integer} req The requested number of compressed bytes (optional)
|
||||
* @returns {Promise<0>} A Promise for 0 when all data have been added to the stream
|
||||
*/
|
||||
Decompressor.prototype._fillInBufferIfNeeded = function(currOffset, len) {
|
||||
if (this._inStreamPos + len < this._inStreamChunkedPos) {
|
||||
// We should still have enough data in the buffer (because decompressed len > compressed len)
|
||||
Decompressor.prototype._fillInBufferIfNeeded = function(req) {
|
||||
req = req || 0;
|
||||
if (this._inStreamPos + req < this._inStreamChunkedPos) {
|
||||
// We should still have enough data in the buffer
|
||||
// DEV: When converting to Promise/A+, use Promise.resolve(0) here
|
||||
return Q.when(0);
|
||||
}
|
||||
var that = this;
|
||||
return this._reader(this._inStreamPos, this._chunkSize).then(function(data) {
|
||||
return this._reader(this._inStreamPos, zd._chunkSize).then(function(data) {
|
||||
// Populate inBuffer and assign asm/wasm memory if not already assigned
|
||||
that._inBuffer.size = data.length;
|
||||
if (!that._inBuffer.src) {
|
||||
that._inBuffer.src = mallocOrDie(that._inBuffer.size);
|
||||
}
|
||||
// Re-use inBuffer
|
||||
that._inBuffer.pos = 0;
|
||||
var inBufferStruct = new Int32Array([that._inBuffer.src, that._inBuffer.size, that._inBuffer.pos]);
|
||||
zd._inBuffer.size = data.length;
|
||||
// Reset inBuffer
|
||||
zd._inBuffer.pos = 0;
|
||||
var inBufferStruct = new Int32Array([zd._inBuffer.src, zd._inBuffer.size, zd._inBuffer.pos]);
|
||||
// Write inBuffer structure to previously assigned w/asm memory
|
||||
zd.HEAP32.set(inBufferStruct, that._inBuffer.ptr >> 2);
|
||||
// Populate outBuffer (but re-use existing if it was already assinged)
|
||||
// DEV: because we're re-using the allocated memory (malloc), you cannot change the _outBuffer.size field locally
|
||||
// _outBuffer.size is the maximum amount the ZSTD codec is allowed to decode in one go
|
||||
// so if we need more data, we just copy those decoded bytes and reset _ouBuffer.pos to 0
|
||||
if (!that._outBuffer.dst) {
|
||||
that._outBuffer.dst = mallocOrDie(that._outBuffer.size);
|
||||
}
|
||||
var outBufferStruct = new Int32Array([that._outBuffer.dst, that._outBuffer.size, that._outBuffer.pos]);
|
||||
zd.HEAP32.set(inBufferStruct, zd._inBuffer.ptr >> 2);
|
||||
var outBufferStruct = new Int32Array([zd._outBuffer.dst, zd._outBuffer.size, zd._outBuffer.pos]);
|
||||
// Write outBuffer structure to w/asm memory
|
||||
zd.HEAP32.set(outBufferStruct, that._outBuffer.ptr >> 2);
|
||||
zd.HEAP32.set(outBufferStruct, zd._outBuffer.ptr >> 2);
|
||||
|
||||
// Transfer the (new) data to be read to the inBuffer
|
||||
zd.HEAP8.set(data, that._inBuffer.src);
|
||||
zd.HEAPU8.set(data, zd._inBuffer.src);
|
||||
that._inStreamChunkedPos += data.length;
|
||||
return 0;
|
||||
});
|
||||
@ -265,8 +266,9 @@ define(['q', 'zstdec'], function(Q) {
|
||||
|
||||
/**
|
||||
* Provision asm/wasm data block and get a pointer to the assigned location
|
||||
* @param {Number} sizeOfData The number of bytes to be allocated
|
||||
* @returns {Number} Pointer to the assigned data block
|
||||
* Code used from excellent WASM tutorial here: https://marcoselvatici.github.io/WASM_tutorial/
|
||||
* @param {Integer} sizeOfData The number of bytes to be allocated
|
||||
* @returns {Integer} Pointer to the assigned data block
|
||||
*/
|
||||
function mallocOrDie(sizeOfData) {
|
||||
const dataPointer = zd._malloc(sizeOfData);
|
||||
|
Loading…
x
Reference in New Issue
Block a user