Add updates from kiwix-js

Former-commit-id: 25d2700cb2ecc3c22bcd70833ed514fc6a72724d [formerly ff1f960c784b76dc2d2ecd4d7bdbd58524d1f14f [formerly 52dfa73d8c84e3816e2c09c9c03304efff590076]]
Former-commit-id: 4fb64b04d17666efb8f42fabf45e413c582dde45
Former-commit-id: 81c61791af26b259ec2510acb9e73dc48ae6d106
This commit is contained in:
Jaifroid 2020-09-03 08:05:28 +01:00
parent ab629f71fe
commit c6489080ff

View File

@ -25,6 +25,16 @@ define(['q', 'zstdec'], function(Q) {
// Note that we include zstdec above in requireJS definition, but we cannot change the name in the function list
// There is no longer any need to load it in index.html
// For explanation of loading method below to avoid conflicts, see https://github.com/emscripten-core/emscripten/blob/master/src/settings.js
/**
* The ZSTD Decoder instance
* @constructor Constructs the zd object representing a ZSTD decoder Emscripten instance
* @property {Integer} _decHandle The decoder stream context object in asm memory (to be re-used for each decoder operation)
* @property {Object} _inBuffer A JS copy of the inBuffer structure to be set in asm memory (malloc)
* @property {Object} _outBuffer A JS copy of the outBuffer structure to be set in asm memory (malloc)
* @property {Integer} _chunkSize The number of compressed bytes to feed to the decompressor in any one read loop
*/
var zd;
ZD().then(function(instance) {
// Instantiate the zd object
@ -35,6 +45,40 @@ define(['q', 'zstdec'], function(Q) {
// Get a permanent decoder handle (pointer to control structure)
// NB there is no need to change this handle even between ZIM loads: zstddeclib encourages re-using assigned structures
zd._decHandle = zd._ZSTD_createDStream();
// DEV set chunkSize according to memory environment; for systems with plenty of memory,
// zd can provide a max recommended size with zd._chunkSize = zd._ZSTD_DStreamInSize();
zd._chunkSize = 5 * 1024;
// Initialize inBuffer
zd._inBuffer = {
ptr: null, /* pointer to this inBuffer structure in w/asm memory */
src: null, /* void* src < start of input buffer */
size: zd._chunkSize, /* size_t size < size of input buffer */
pos: 0 /* size_t pos; < position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
};
// Reserve w/asm memory for the inBuffer structure (we will populate assigned memory later)
zd._inBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
// Reserve w/asm memory for the inBuffer data stream
zd._inBuffer.src = mallocOrDie(zd._inBuffer.size);
// DEV: Size of outBuffer is currently set as recommended by zd._ZSTD_DStreamOutSize() below; if you are running into
// memory issues, it may be possible to reduce memory consumption by setting a smaller outBuffer size here and
// reompiling zstdec.js with lower TOTAL_MEMORY (or just search for INITIAL_MEMORY in zstdec.js and change it)
var recOutBufSize = zd._chunkSize * 4;
var maxOutBufSize = zd._ZSTD_DStreamOutSize();
var outBufSize = recOutBufSize > maxOutBufSize ? maxOutBufSize : recOutBufSize;
// Initialize outBuffer
zd._outBuffer = {
ptr: null, /* pointer to this outBuffer structure in asm/wasm memory */
dst: null, /* void* dst < start of output buffer (pointer) */
size: outBufSize, /* size_t size < size of output buffer */
pos: 0 /* size_t pos < position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
};
// Reserve w/asm memory for the outBuffer structure
zd._outBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
// Reserve w/asm memory for the outBuffer data steam
zd._outBuffer.dst = mallocOrDie(zd._outBuffer.size);
});
/**
@ -51,33 +95,26 @@ define(['q', 'zstdec'], function(Q) {
/**
* @typedef Decompressor
* @property {Integer} _chunkSize The amount to feed to the decompressor in any one read loop
* @property {FileReader} _reader The filereader to use (uses plain blob reader defined in zimfile.js)
* @property {Integer} _inStreamPos The current known position in the steam of compressed bytes
* @property {Integer} _inStreamChunkedPos The position once the currently loaded chunk will have been consumed
* @property {Integer} _outStreamPos The position in the decoded byte stream (offset from start of cluster)
* @property {Array} _outDataBuf The buffer that stores decoded bytes (it is set to the requested blob's lenght, and when full, the data are returned)
* @property {Array} _outDataBuf The buffer that stores decoded bytes (it is set to the requested blob's length, and when full, the data are returned)
* @property {Integer} _outDataBufPos The number of bytes of the requested blob decoded so far
* @property {Object} _inBuffer A JS copy of the inBuffer structure to be set in decompressor memory (malloc)
* @property {Object} _outBuffer A JS copy of the outBuffer structure to be set in decompressor memory (malloc)
*/
/**
* @constructor
* @param {FileReader} reader
* @param {Integer} chunkSize
* @returns {Decompressor}
* @param {FileReader} reader The reader used to extract file slices (defined in zimfile.js)
*/
function Decompressor(reader, chunkSize) {
this._chunkSize = chunkSize || 5 * 1024;
// this._chunkSize = chunkSize || zd._ZSTD_DStreamInSize();
function Decompressor(reader) {
this._reader = reader;
}
/**
* Read length bytes, offset into the decompressed stream. Consecutive calls may only
* advance in the stream and may not overlap.
* @param {Integer} offset Offset from which to start reading
* @param {Integer} length Number of bytes to read
* Set up the decompression stream, and initiate a read loop to decompress from the beginning of the cluster
* until we reach <offset> in the decompressed byte stream
* @param {Integer} offset Cluster offset (in deocmpressed stream) from which to start reading
* @param {Integer} length Number of decompressed bytes to read
* @returns {Promise<ArrayBuffer>} Promise for an ArrayBuffer with decoded data
*/
Decompressor.prototype.readSlice = function(offset, length) {
@ -87,28 +124,6 @@ define(['q', 'zstdec'], function(Q) {
this._outStreamPos = 0;
this._outDataBuf = new Int8Array(new ArrayBuffer(length));
this._outDataBufPos = 0;
// Initialize inBuffer
this._inBuffer = {
ptr: null, /* pointer to this inBuffer structure in w/asm memory */
src: null, /* void* src < start of input buffer */
size: length, /* size_t size < size of input buffer */
pos: 0 /* size_t pos; < position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
};
// Reserve w/asm memory for the outBuffer structure
this._inBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
// DEV: Size of outBuffer is currently set as recommended by zd._ZSTD_DStreamOutSize() below; if you are running into
// memory issues, it may be possible to reduce memory consumption by setting asmaller outBuffer size here and
// reompiling zstdec.js with lower TOTAL_MEMORY (or just search for INITIAL_MEMORY in zstdec.js and change it)
var recOutbufSize = zd._ZSTD_DStreamOutSize();
// Initialize outBuffer
this._outBuffer = {
ptr: null, /* pointer to this outBuffer structure in asm/wasm memory */
dst: null, /* void* dst < start of output buffer (pointer) */
size: recOutbufSize, /* size_t size < size of output buffer */
pos: 0 /* size_t pos < position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
};
this._outBuffer.ptr = mallocOrDie(3 << 2); // 3 x 32bit bytes
var ret = zd._ZSTD_initDStream(zd._decHandle);
if (zd._ZSTD_isError(ret)) {
return Q.reject('Failed to initialize ZSTD decompression');
@ -116,27 +131,23 @@ define(['q', 'zstdec'], function(Q) {
var that = this;
return this._readLoop(offset, length).then(function(data) {
// DEV: These structures are a known fixed length and could be assigned once, avoiding the need to free them
// currently they are re-assigned on each blob request; consider changing this if memory usage appears to grow over time
zd._free(that._inBuffer.src);
zd._free(that._inBuffer.ptr);
zd._free(that._outBuffer.dst);
zd._free(that._outBuffer.ptr);
// DEV: Freeing zd._decHandle is not needed, and actually increases memory consumption (crashing zstddeclib)
// The library explicitly encourages re-using assigned structures and handles
// DEV: We are re-using all the allocated w/asm memory, so we do not need to free any of structures assigned wiht _malloc
// However, should you need to free assigned structures use, e.g., zd._free(zd._inBuffer.src);
// Additionally, freeing zd._decHandle is not needed, and actually increases memory consumption (crashing zstddeclib)
// Should you need to free the decoder stream handle, use command below, but be sure to create a new stream control object
// before attempting further decompression
// zd._ZSTD_freeDStream(zd._decHandle);
busy = false;
console.log("Freed all data structures.");
return data;
});
};
/**
* Reads stream of data from file offset for length of bytes to send to the decompresor
* This function ensures that only one decompression runs at a time
* @param {Integer} offset The file offset at which to begin reading compressed data
* @param {Integer} length The amount of data to read
* @returns {Promise} A Promise for the read data
* This function ensures that only one decompression runs at a time, launching readSlice() only when
* the decompressor is no longer busy
* @param {Integer} offset The cluster offset (in decompressed stream) at which the requested blob resides
* @param {Integer} length The number of decompressed bytes to read
* @returns {Promise} A Promise for the readSlice() function
*/
Decompressor.prototype.readSliceSingleThread = function (offset, length) {
if (!busy) {
@ -156,15 +167,16 @@ define(['q', 'zstdec'], function(Q) {
/**
* The main loop for sending compressed data to the decompressor and retrieving decompressed bytes
* @param {Integer} offset The offset in the *decompressed* byte stream at which the requeste blob resides
* Consecutive calls to readLoop may only advance in the stream and may not overlap
* @param {Integer} offset The offset in the *decompressed* byte stream at which the requested blob resides
* @param {Integer} length The deomcpressed size of the requested blob
* @param {Integer} dataRequest The recommended number of bytes the docompressor has requested
* @returns {Promise<Int8Array>} A Promise for an Int8Array containing the requested blob's decompressed bytes
*/
Decompressor.prototype._readLoop = function(offset, length) {
Decompressor.prototype._readLoop = function(offset, length, dataRequest) {
var that = this;
return this._fillInBufferIfNeeded(offset, length).then(function() {
var ret = zd._ZSTD_decompressStream(zd._decHandle, that._outBuffer.ptr, that._inBuffer.ptr);
// var ret = zd._ZSTD_decompressStream_simpleArgs(that._decHandle, that._outBuffer.ptr, that._outBuffer.size, 0, that._inBuffer.ptr, that._inBuffer.size, 0);
return this._fillInBufferIfNeeded(offset, length, dataRequest).then(function() {
var ret = zd._ZSTD_decompressStream(zd._decHandle, zd._outBuffer.ptr, zd._inBuffer.ptr);
if (zd._ZSTD_isError(ret)) {
var errorMessage = "Failed to decompress data stream!\n" + zd.getErrorString(ret);
console.error(errorMessage);
@ -176,17 +188,16 @@ define(['q', 'zstdec'], function(Q) {
finished = true;
} else if (ret > 0) {
// supply more data
that._inBuffer.size = ret;
zd._inBuffer.size = ret;
}
// Get updated inbuffer values for processing on the JS sice
// NB the zd.Decoder will read these values from its own buffers
var ibx32ptr = that._inBuffer.ptr >> 2;
that._inBuffer.pos = zd.HEAP32[ibx32ptr + 2];
var ibx32ptr = zd._inBuffer.ptr >> 2;
zd._inBuffer.pos = zd.HEAP32[ibx32ptr + 2];
// Get updated outbuffer values
var obx32ptr = that._outBuffer.ptr >> 2;
// that._outBuffer.size = zd.HEAP32[obx32ptr + 1];
var obx32ptr = zd._outBuffer.ptr >> 2;
var outPos = zd.HEAP32[obx32ptr + 2];
// If data have been decompressed, check to see whether the data are in the offset range we need
@ -195,69 +206,59 @@ define(['q', 'zstdec'], function(Q) {
console.log('**Copying decompressed bytes**\ncopyStart: ' + copyStart);
if (copyStart < 0) copyStart = 0;
for (var i = copyStart; i < outPos && that._outDataBufPos < that._outDataBuf.length; i++)
that._outDataBuf[that._outDataBufPos++] = zd.HEAP8[that._outBuffer.dst + i];
that._outDataBuf[that._outDataBufPos++] = zd.HEAP8[zd._outBuffer.dst + i];
}
if (that._outDataBufPos === that._outDataBuf.length) finished = true;
// Increment the byte stream positions
that._inStreamPos += that._inBuffer.pos;
that._inStreamPos += zd._inBuffer.pos;
that._outStreamPos += outPos;
// DEV: if outPos is > 0, then we have either copied all data from outBuffer, or we can now throw those data away
// because they are before our required offset
// Se we can now reset the asm outBuffer.pos field to 0
zd.HEAP32[obx32ptr + 2] = 0;
// However, this isn't necessary becasuse zd._outBuffer.pos is always 0, and the buffer will be reset - WILL IT???
// do not change the _outBuffer.size field locally; _outBuffer.size is the maximum amount the ZSTD codec is allowed
// to decode in one go, but even if it is only partially written, we just copy the decoded bytes and reset _ouBuffer.pos to 0
// TESTING (remove before merge)
console.log("Offset: " + offset + "\nLength: " + length + "\ninStreamPos: " + that._inStreamPos + "\noutStreamPos: " + that._outStreamPos);
if (outPos > 0) {
// We have either copied all data from outBuffer, or we can throw those data away because they are before our required offset
// This resets the outbuffer->ptr to 0, so we can re-use the outbuffer memory space without re-initializing
// Below is the 'raw' way to do this for info, but the JS copy will be set in fillInBufferIfNeeded()
// zd.HEAP32[obx32ptr + 2] = 0;
that._outBuffer.pos = 0;
}
if (finished) {
console.log("Read loop finished.");
return that._outDataBuf;
} else {
return that._readLoop(offset, length);
return that._readLoop(offset, length, ret);
}
});
};
/**
* Fills in the instream buffer if needed
* @param {Integer} currOffset The current read offset
* @param {Integer} len The decompressed length of data requested
* @param {Integer} req The requested number of compressed bytes (optional)
* @returns {Promise<0>} A Promise for 0 when all data have been added to the stream
*/
Decompressor.prototype._fillInBufferIfNeeded = function(currOffset, len) {
if (this._inStreamPos + len < this._inStreamChunkedPos) {
// We should still have enough data in the buffer (because decompressed len > compressed len)
Decompressor.prototype._fillInBufferIfNeeded = function(req) {
req = req || 0;
if (this._inStreamPos + req < this._inStreamChunkedPos) {
// We should still have enough data in the buffer
// DEV: When converting to Promise/A+, use Promise.resolve(0) here
return Q.when(0);
}
var that = this;
return this._reader(this._inStreamPos, this._chunkSize).then(function(data) {
return this._reader(this._inStreamPos, zd._chunkSize).then(function(data) {
// Populate inBuffer and assign asm/wasm memory if not already assigned
that._inBuffer.size = data.length;
if (!that._inBuffer.src) {
that._inBuffer.src = mallocOrDie(that._inBuffer.size);
}
// Re-use inBuffer
that._inBuffer.pos = 0;
var inBufferStruct = new Int32Array([that._inBuffer.src, that._inBuffer.size, that._inBuffer.pos]);
zd._inBuffer.size = data.length;
// Reset inBuffer
zd._inBuffer.pos = 0;
var inBufferStruct = new Int32Array([zd._inBuffer.src, zd._inBuffer.size, zd._inBuffer.pos]);
// Write inBuffer structure to previously assigned w/asm memory
zd.HEAP32.set(inBufferStruct, that._inBuffer.ptr >> 2);
// Populate outBuffer (but re-use existing if it was already assinged)
// DEV: because we're re-using the allocated memory (malloc), you cannot change the _outBuffer.size field locally
// _outBuffer.size is the maximum amount the ZSTD codec is allowed to decode in one go
// so if we need more data, we just copy those decoded bytes and reset _ouBuffer.pos to 0
if (!that._outBuffer.dst) {
that._outBuffer.dst = mallocOrDie(that._outBuffer.size);
}
var outBufferStruct = new Int32Array([that._outBuffer.dst, that._outBuffer.size, that._outBuffer.pos]);
zd.HEAP32.set(inBufferStruct, zd._inBuffer.ptr >> 2);
var outBufferStruct = new Int32Array([zd._outBuffer.dst, zd._outBuffer.size, zd._outBuffer.pos]);
// Write outBuffer structure to w/asm memory
zd.HEAP32.set(outBufferStruct, that._outBuffer.ptr >> 2);
zd.HEAP32.set(outBufferStruct, zd._outBuffer.ptr >> 2);
// Transfer the (new) data to be read to the inBuffer
zd.HEAP8.set(data, that._inBuffer.src);
zd.HEAPU8.set(data, zd._inBuffer.src);
that._inStreamChunkedPos += data.length;
return 0;
});
@ -265,8 +266,9 @@ define(['q', 'zstdec'], function(Q) {
/**
* Provision asm/wasm data block and get a pointer to the assigned location
* @param {Number} sizeOfData The number of bytes to be allocated
* @returns {Number} Pointer to the assigned data block
* Code used from excellent WASM tutorial here: https://marcoselvatici.github.io/WASM_tutorial/
* @param {Integer} sizeOfData The number of bytes to be allocated
* @returns {Integer} Pointer to the assigned data block
*/
function mallocOrDie(sizeOfData) {
const dataPointer = zd._malloc(sizeOfData);