Add Replay system capability for full-spec Zimit replay (#500)

This commit is contained in:
Jaifroid 2023-12-11 18:36:16 +00:00 committed by GitHub
parent 41bca5f437
commit cb2e6bb429
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 61530 additions and 292 deletions

View File

@ -55,6 +55,10 @@ function createWindow () {
preload: path.join(__dirname, 'preload.cjs'), preload: path.join(__dirname, 'preload.cjs'),
nativeWindowOpen: true, nativeWindowOpen: true,
nodeIntegrationInWorker: true nodeIntegrationInWorker: true
// nodeIntegration: false,
// contextIsolation: true,
// enableRemoteModule: false,
// sandbox: true
} }
}); });
@ -62,6 +66,7 @@ function createWindow () {
// mainWindow.webContents.openDevTools(); // mainWindow.webContents.openDevTools();
mainWindow.loadFile('www/index.html'); mainWindow.loadFile('www/index.html');
// mainWindow.loadURL('https://pwa.kiwix.org/www/index.html');
} }
function registerListeners () { function registerListeners () {

60349
replayWorker.js Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,11 +1,12 @@
/* eslint-disable object-property-newline, object-curly-newline */
// rollup.config.js // rollup.config.js
import resolve from '@rollup/plugin-node-resolve'; import resolve from '@rollup/plugin-node-resolve';
import babel from '@rollup/plugin-babel'; import babel from '@rollup/plugin-babel';
import { transformAsync } from '@babel/core';
import commonjs from '@rollup/plugin-commonjs'; import commonjs from '@rollup/plugin-commonjs';
import replace from '@rollup/plugin-replace'; import replace from '@rollup/plugin-replace';
import copy from 'rollup-plugin-copy'; import copy from 'rollup-plugin-copy';
import terser from '@rollup/plugin-terser'; import terser from '@rollup/plugin-terser';
import { minify } from 'terser';
// import styles from "@ironkinoko/rollup-plugin-styles"; // import styles from "@ironkinoko/rollup-plugin-styles";
const config = { const config = {
@ -65,18 +66,70 @@ if (process.env.BUILD === 'production') {
onlyFiles: true onlyFiles: true
}, },
{ {
src: ['service-worker.js', 'KiwixWebApp*.jsproj'], src: ['KiwixWebApp*.jsproj'],
dest: 'dist',
transform: (contents, filename) => contents.toString()
// Replace the entry point with the bundle
.replace(/(www[\\/]js[\\/])app.js/, '$1bundle.min.js')
// Remove all the lib files that will be included in the bundle
.replace(/(?:<Content Include=)?['"]www[\\/]js[\\/]lib[\\/]cache[\s\S]+zimfile.js['"](?:\s*\/>|,)\s*/, '')
// Alter remaining lib references
.replace(/([\\/])js[\\/]lib/g, '$1js')
// Remove unneeded ASM/WASM binaries
.replace(/["']www[\\/]js[\\/].*dec.*js["'],\s*/g, '')
},
{
src: ['service-worker.js'],
dest: 'dist', dest: 'dist',
// Modify the Service Worker precache files // Modify the Service Worker precache files
transform: (contents, filename) => contents.toString() transform: async (contents, filename) => {
// Replace the entry point with the bundle const result = await minify(
.replace(/(www[\\/]js[\\/])app.js/, '$1bundle.min.js') contents.toString()
// Remove all the lib files that will be included in the bundle // Replace the entry point with the bundle
.replace(/(?:<Content Include=)?['"]www[\\/]js[\\/]lib[\\/]cache[\s\S]+zimfile.js['"](?:\s*\/>|,)\s*/, '') .replace(/(www[\\/]js[\\/])app.js/, '$1bundle.min.js')
// Alter remaining lib references // Remove all the lib files that will be included in the bundle
.replace(/([\\/])js[\\/]lib/g, '$1js') .replace(/(?:<Content Include=)?["']www[\\/]js[\\/]lib[\\/]cache[\s\S]+zimfile.js["'](?:\s*\/>|,)\s*/, '')
// Remove unneeded ASM/WASM binaries // Replace any references to node_modules
.replace(/['"]www[\\/]js[\\/].*dec.*js['"],\s*/g, '') .replace(/node_modules\/.*dist\/((?:js|css)\/)?/g, function (m, p1) {
p1 = p1 || 'js/';
return 'www/' + p1;
})
// Alter remaining lib references
.replace(/([\\/])js[\\/]lib/g, '$1js')
// Remove unneeded ASM/WASM binaries
.replace(/["']www[\\/]js[\\/].*dec.*js["'],\s*/g, '')
);
return result.code;
}
},
{
src: ['replayWorker.js'],
dest: 'dist',
transform: async (contents, filename) => {
const code = contents.toString();
// Now minify the replayWorker
const minified = await minify(code);
// How to transform with babel (tested to make a difference with Firefox 68+)
const result = await transformAsync(minified.code, {
filename,
presets: [
[
'@babel/preset-env',
{
targets: {
edge: '18',
firefox: '60',
chrome: '67',
safari: '11.1'
},
modules: false,
spec: true
}
]
]
});
return result.code;
}
}, },
{ {
src: 'www/index.html', src: 'www/index.html',
@ -117,7 +170,7 @@ if (process.env.BUILD === 'production') {
// Replace the entry point with the bundle // Replace the entry point with the bundle
.replace(/(www[\\/]js[\\/])app.js/, '$1bundle.js') .replace(/(www[\\/]js[\\/])app.js/, '$1bundle.js')
// Remove all the lib files that will be included in the bundle // Remove all the lib files that will be included in the bundle
.replace(/(?:<Content Include=)?['"]www[\\/]js[\\/]lib[\\/]cache[\s\S]+zimfile.js['"](?:\s*\/>|,)\s*/, '') .replace(/(?:<Content Include=)?["']www[\\/]js[\\/]lib[\\/]cache[\s\S]+zimfile.js["'](?:\s*\/>|,)\s*/, '')
// Replace any references to node_modules // Replace any references to node_modules
.replace(/node_modules[\\/].*dist[\\/]((?:js|css)[\\/])?/g, function (m, p1) { .replace(/node_modules[\\/].*dist[\\/]((?:js|css)[\\/])?/g, function (m, p1) {
p1 = p1 || 'js/'; p1 = p1 || 'js/';
@ -128,6 +181,10 @@ if (process.env.BUILD === 'production') {
// Remove unneeded ASM/WASM binaries // Remove unneeded ASM/WASM binaries
.replace(/['"]www[\\/]js[\\/].*dec.*js['"],\s*/g, '') .replace(/['"]www[\\/]js[\\/].*dec.*js['"],\s*/g, '')
}, },
{
src: ['replayWorker.js'],
dest: 'dist'
},
{ {
src: 'www/index.html', src: 'www/index.html',
dest: 'dist/www', dest: 'dist/www',

View File

@ -25,6 +25,8 @@
/* global chrome */ /* global chrome */
/* eslint-disable prefer-const */
/** /**
* App version number - ENSURE IT MATCHES VALUE IN init.js * App version number - ENSURE IT MATCHES VALUE IN init.js
* DEV: Changing this will cause the browser to recognize that the Service Worker has changed, and it will * DEV: Changing this will cause the browser to recognize that the Service Worker has changed, and it will
@ -68,12 +70,17 @@ var useAppCache = true;
* A Boolean that governs whether images are displayed * A Boolean that governs whether images are displayed
* app.js can alter this variable via messaging * app.js can alter this variable via messaging
*/ */
let imageDisplay; let imageDisplay = 'all';
// Kiwix ZIM Archive Download Server and release update server in regex form // Kiwix ZIM Archive Download Server and release update server in regex form
// DEV: The server URL is defined in init.js, but is not available to us in SW // DEV: The server URL is defined in init.js, but is not available to us in SW
const regexpKiwixDownloadLinks = /download\.kiwix\.org|api\.github\.com/i; const regexpKiwixDownloadLinks = /download\.kiwix\.org|api\.github\.com/i;
/**
* A global Boolean that records whether the ReplayWorker is available
*/
let isReplayWorkerAvailable = false;
/** /**
* A regular expression that matches the Content-Types of assets that may be stored in ASSETS_CACHE * A regular expression that matches the Content-Types of assets that may be stored in ASSETS_CACHE
* Add any further Content-Types you wish to cache to the regexp, separated by '|' * Add any further Content-Types you wish to cache to the regexp, separated by '|'
@ -113,6 +120,7 @@ const regexpByteRangeHeader = /^\s*bytes=(\d+)-/;
const precacheFiles = [ const precacheFiles = [
'.', // This caches the redirect to www/index.html, in case a user launches the app from its root directory '.', // This caches the redirect to www/index.html, in case a user launches the app from its root directory
'manifest.json', 'manifest.json',
'replayWorker.js',
'service-worker.js', 'service-worker.js',
'www/favicon.ico', 'www/favicon.ico',
'www/-/mw/ext.cite.styles.css', 'www/-/mw/ext.cite.styles.css',
@ -177,6 +185,7 @@ const precacheFiles = [
'www/img/spinner.gif', 'www/img/spinner.gif',
'www/index.html', 'www/index.html',
'www/article.html', 'www/article.html',
'www/topFrame.html',
'www/js/app.js', 'www/js/app.js',
'www/js/init.js', 'www/js/init.js',
'www/js/lib/cache.js', 'www/js/lib/cache.js',
@ -294,6 +303,40 @@ self.addEventListener('activate', function (event) {
); );
}); });
// Wrapped in try-catch
try {
// Import ReplayWorker
self.importScripts('./replayWorker.js');
isReplayWorkerAvailable = true;
console.log('[SW] ReplayWorker is available');
} catch (err) {
console.warn('[SW ReplayWorker is NOT available', err);
isReplayWorkerAvailable = false;
}
let replayCollectionsReloaded;
// Instruct the ReplayWorker to reload all collections, and adjust the root configuration (this is necessary after thw SW has stopped and restarted)
if (isReplayWorkerAvailable) {
replayCollectionsReloaded = self.sw.collections.listAll().then(function (colls) {
if (colls) {
console.debug('[SW] Reloading ReplayWorker collections', colls);
return Promise.all(colls.map(function (coll) {
// console.debug('[SW] Reloading ReplayWorker collection ' + coll.name);
return self.sw.collections.reload(coll.name);
})).then(function () {
// Adjust the root configuration
if (self.sw.collections.root) {
console.debug('[SW] Adjusting ReplayWorker root configuration to ' + self.sw.collections.root);
return setReplayCollectionAsRoot(self.sw.collections.colls[self.sw.collections.root].config.sourceUrl, self.sw.collections.root);
}
});
} else {
console.debug('[SW] No ReplayWorker collections to reload');
}
});
}
// For PWA functionality, this should be true unless explicitly disabled, and in fact currently it is never disabled // For PWA functionality, this should be true unless explicitly disabled, and in fact currently it is never disabled
let fetchCaptureEnabled = true; let fetchCaptureEnabled = true;
@ -301,18 +344,27 @@ let fetchCaptureEnabled = true;
* Intercept selected Fetch requests from the browser window * Intercept selected Fetch requests from the browser window
*/ */
self.addEventListener('fetch', function (event) { self.addEventListener('fetch', function (event) {
// console.debug('[SW] Fetch Event processing', event.request.url);
// Only handle GET or POST requests (POST is intended to handle video in Zimit ZIMs) // Only handle GET or POST requests (POST is intended to handle video in Zimit ZIMs)
if (!/GET|POST/.test(event.request.method)) return; if (!/GET|POST/.test(event.request.method)) return;
var rqUrl = event.request.url; var rqUrl = event.request.url;
// Filter out requests that do not match the scope of the Service Worker // Filter out requests that do not match the scope of the Service Worker
if (/\/dist\/(www|[^/]+?\.zim)\//.test(rqUrl) && !/\/dist\//.test(self.registration.scope)) return; if (/\/dist\/(www|[^/]+?\.zim)\//.test(rqUrl) && !/\/dist\//.test(self.registration.scope)) return;
// Filter darkReader request transformed by wombat.js
if (/\.zim.*\/www\/js\/(?:lib\/)?darkreader\.min\.js/.test(rqUrl)) {
rqUrl = rqUrl.replace(/^([^:]+:\/\/[^/]+)(?:[^/]|\/(?!js_\/))+\/js_\/[^:]+:\/\/[^/]+(.+)/, '$1$2');
}
var urlObject = new URL(rqUrl); var urlObject = new URL(rqUrl);
// Test the URL with parameters removed // Test the URL with parameters removed
var strippedUrl = urlObject.pathname; var strippedUrl = urlObject.pathname;
// Test if we're in an Electron app // Test if we're in an Electron app
// DEV: Electron uses the file:// protocol and hacks it to work with SW, but it has CORS issues when using the Fetch API to fetch local files, // DEV: Electron uses the file:// protocol and hacks it to work with SW, but it has CORS issues when using the Fetch API to fetch local files,
// so we must bypass it here if we're fetching a local file // so we must bypass it here if we're fetching a local file
if (/^file:/i.test(rqUrl) && !(regexpZIMUrlWithNamespace.test(strippedUrl) && /\.zim\//i.test(strippedUrl))) return; if (/^file:/i.test(rqUrl)) {
// For now the Replay Worke doesn't work with the file:// protocol
isReplayWorkerAvailable = false;
if (!(regexpZIMUrlWithNamespace.test(strippedUrl) && /\.zim\//i.test(strippedUrl))) return;
}
// Don't cache download links // Don't cache download links
if (regexpKiwixDownloadLinks.test(rqUrl)) return; if (regexpKiwixDownloadLinks.test(rqUrl)) return;
// Select cache depending on request format // Select cache depending on request format
@ -320,7 +372,6 @@ self.addEventListener('fetch', function (event) {
if (cache === ASSETS_CACHE && !fetchCaptureEnabled) return; if (cache === ASSETS_CACHE && !fetchCaptureEnabled) return;
// For APP_CACHE assets, we should ignore any querystring (whereas it should be conserved for ZIM assets, // For APP_CACHE assets, we should ignore any querystring (whereas it should be conserved for ZIM assets,
// especially .js assets, where it may be significant). Anchor targets are irreleveant in this context. // especially .js assets, where it may be significant). Anchor targets are irreleveant in this context.
// @TODO DEV: This isn't true for Zimit ZIM types! So we will have to send the zimType from app.js
if (cache === APP_CACHE) rqUrl = strippedUrl; if (cache === APP_CACHE) rqUrl = strippedUrl;
return event.respondWith( return event.respondWith(
// First see if the content is in the cache // First see if the content is in the cache
@ -330,50 +381,60 @@ self.addEventListener('fetch', function (event) {
}, function () { }, function () {
// The response was not found in the cache so we look for it in the ZIM // The response was not found in the cache so we look for it in the ZIM
// and add it to the cache if it is an asset type (css or js) // and add it to the cache if it is an asset type (css or js)
// YouTube links from Zimit archives are dealt with specially return zimitResolver(event).then(function (modRequestOrResponse) {
if (/youtubei.*player/.test(strippedUrl) || cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(strippedUrl)) { if (modRequestOrResponse instanceof Response) {
const range = event.request.headers.get('range'); // The request was modified by the ReplayWorker and it returned a modified response, so we return it
if (imageDisplay !== 'all' && /\/.*\.(jpe?g|png|svg|gif|webp)(?=.*?kiwix-display)/i.test(rqUrl)) { // console.debug('[SW] Returning modified response from ReplayWorker', modRequest);
// If the user has disabled the display of images, and the browser wants an image, respond with empty SVG return cacheAndReturnResponseForAsset(event, modRequestOrResponse);
// A URL without "?kiwix-display" query string acts as a passthrough so that the regex will not match and }
// the image will be fetched by app.js rqUrl = modRequestOrResponse.url;
// DEV: If you need to hide more image types, add them to regex below and also edit equivalent regex in app.js urlObject = new URL(rqUrl);
var svgResponse; strippedUrl = urlObject.pathname;
if (imageDisplay === 'manual') { // YouTube links from Zimit archives are dealt with specially (for ZIMs not being read by the ReplayWorker)
svgResponse = "<svg xmlns='http://www.w3.org/2000/svg' width='1' height='1'><rect width='1' height='1' style='fill:lightblue'/></svg>"; if (/youtubei.*player/.test(strippedUrl) || cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(strippedUrl)) {
} else { if (imageDisplay !== 'all' && /\/.*\.(jpe?g|png|svg|gif|webp)(?=.*?kiwix-display)/i.test(rqUrl)) {
svgResponse = "<svg xmlns='http://www.w3.org/2000/svg'/>"; // If the user has disabled the display of images, and the browser wants an image, respond with empty SVG
} // A URL without "?kiwix-display" query string acts as a passthrough so that the regex will not match and
return new Response(svgResponse, { // the image will be fetched by app.js
headers: { // DEV: If you need to hide more image types, add them to regex below and also edit equivalent regex in app.js
'Content-Type': 'image/svg+xml' var svgResponse;
if (imageDisplay === 'manual') {
svgResponse = "<svg xmlns='http://www.w3.org/2000/svg' width='1' height='1'><rect width='1' height='1' style='fill:lightblue'/></svg>";
} else {
svgResponse = "<svg xmlns='http://www.w3.org/2000/svg'/>";
} }
return new Response(svgResponse, {
headers: {
'Content-Type': 'image/svg+xml'
}
});
}
const range = modRequestOrResponse.headers.get('range');
return fetchUrlFromZIM(urlObject, range).then(function (response) {
// // DEV: For normal reads, this is now done in app.js, but for libzim, we have to do it here
// // Add css or js assets to ASSETS_CACHE (or update their cache entries) unless the URL schema is not supported
// if (data && data.origin === 'libzim' && regexpCachedContentTypes.test(response.headers.get('Content-Type')) &&
// !regexpExcludedURLSchema.test(event.request.url)) {
// event.waitUntil(updateCache(ASSETS_CACHE, rqUrl, response.clone()));
// }
return cacheAndReturnResponseForAsset(event, response);
}).catch(function (msgPortData) {
console.error('Invalid message received from app.js for ' + strippedUrl, msgPortData);
return msgPortData;
});
} else {
// It's not an asset, or it doesn't match a ZIM URL pattern, so we should fetch it with Fetch API
return fetch(modRequestOrResponse).then(function (response) {
// If request was successful, add or update it in the cache, but be careful not to cache the ZIM archive itself!
if (!regexpExcludedURLSchema.test(rqUrl) && !/\.zim\w{0,2}$/i.test(strippedUrl)) {
event.waitUntil(updateCache(APP_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (error) {
console.debug('[SW] Network request failed and no cache.', error);
}); });
} }
return fetchUrlFromZIM(urlObject, range).then(function ({ response, data }) { });
// DEV: For normal reads, this is now done in app.js, but for lizim, we have to do it here
// Add css or js assets to ASSETS_CACHE (or update their cache entries) unless the URL schema is not supported
if (data && data.origin === 'libzim' && regexpCachedContentTypes.test(response.headers.get('Content-Type')) &&
!regexpExcludedURLSchema.test(event.request.url)) {
event.waitUntil(updateCache(ASSETS_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (msgPortData) {
console.error('Invalid message received from app.js for ' + strippedUrl, msgPortData);
return msgPortData;
});
} else {
// It's not an asset, or it doesn't match a ZIM URL pattern, so we should fetch it with Fetch API
return fetch(event.request).then(function (response) {
// If request was successful, add or update it in the cache, but be careful not to cache the ZIM archive itself!
if (!regexpExcludedURLSchema.test(event.request.url) && !/\.zim\w{0,2}$/i.test(strippedUrl)) {
event.waitUntil(updateCache(APP_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (error) {
console.debug('[SW] Network request failed and no cache.', error);
});
}
}) })
); );
}); });
@ -397,6 +458,18 @@ self.addEventListener('message', function (event) {
// Note that this code doesn't currently run because the app currently never sends a 'disable' message // Note that this code doesn't currently run because the app currently never sends a 'disable' message
// This is because the app may be running as a PWA, and still needs to be able to fetch assets even in jQuery mode // This is because the app may be running as a PWA, and still needs to be able to fetch assets even in jQuery mode
fetchCaptureEnabled = false; fetchCaptureEnabled = false;
} else if (/(disable|enable)ReplayWorker/.test(event.data.action)) {
// On 'disableReplayWorker' or 'enableReplayWorker' message, we disable or enable the ReplayWorker
// Note that we set it to null rather than false, as false is reserved for when the ReplayWorker is not available at all
if (isReplayWorkerAvailable !== false) {
if (event.data.action === 'enableReplayWorker') {
isReplayWorkerAvailable = true;
console.debug('[SW] ReplayWorker is enabled');
} else if (event.data.action === 'disableReplayWorker') {
isReplayWorkerAvailable = null;
console.debug('[SW] ReplayWorker is disabled');
}
}
} }
var oldValue; var oldValue;
if (event.data.action.assetsCache) { if (event.data.action.assetsCache) {
@ -420,56 +493,221 @@ self.addEventListener('message', function (event) {
event.ports[0].postMessage({ type: cacheArr[0], name: cacheArr[1], description: cacheArr[2], count: cacheArr[3] }); event.ports[0].postMessage({ type: cacheArr[0], name: cacheArr[1], description: cacheArr[2], count: cacheArr[3] });
}); });
} }
} else if (event.data.msg_type) {
// Messages for the ReplayWorker
if (event.data.msg_type === 'addColl') {
console.debug('[SW] addColl message received from app.js');
if (!self.sw) {
console.error('[SW] Zimit ZIMs in ServiceWorker mode are not supported in this browser');
// Reply to the message port with an error
event.ports[0].postMessage({ error: 'ReplayWorker is unsupported!' });
} else {
event.waitUntil(
self.sw.collections._handleMessage(event).then(function () {
setReplayCollectionAsRoot(event.data.prefix, event.data.name);
// Reply to the message port with a success message
event.ports[0].postMessage({ success: 'ReplayWorker is supported!' });
})
);
}
}
} }
}); });
/** /**
* Handles URLs that need to be extracted from the ZIM archive * Sets a Replay collection as the root configuration, so that the Replay Worker will deal correctly with requests to the collection
* *
* @param {URL} urlObject The URL object to be processed for extraction from the ZIM * @param {String} prefix The URL prefix where assets are loaded, consisting of the local path to the ZIM file plus the namespace
* @param {String} range Optional byte range string * @param {String} name The name of the ZIM file (wihtout any extension), used as the Replay root
*/
function setReplayCollectionAsRoot (prefix, name) {
// Guard against prototype pollution attack
if (typeof prefix !== 'string' || typeof name !== 'string') {
console.error('Invalid prefix or name');
return;
}
const dangerousProps = ['__proto__', 'constructor', 'prototype'];
if (dangerousProps.includes(prefix) || dangerousProps.includes(name)) {
console.error('Potentially dangerous prefix or name');
return;
}
self.sw.prefix = prefix;
self.sw.replayPrefix = prefix;
self.sw.distPrefix = prefix + 'dist/';
self.sw.apiPrefix = prefix + 'api/';
self.sw.staticPrefix = prefix + 'static/';
self.sw.api.collections.prefixes = {
main: self.sw.prefix,
root: self.sw.prefix,
static: self.sw.staticPrefix
}
// If we want to be able to get the static data URL directly from the map, we need to replace the keyes, but as this is quite costly (moving a lot of static)
// data around, we're using another way to get the static data URL from the map in zimitResolver()
// let newMap = new Map();
// for (let [key, value] of self.sw.staticData.entries()) {
// const newKey = /wombat\.js/i.test(key) ? self.sw.staticPrefix + 'wombat.js' : /wombatWorkers\.js/i.test(key) ? self.sw.staticPrefix + 'wombatWorkers.js' : key;
// newMap.set(newKey, value);
// }
// self.sw.staticData = newMap;
if (self.sw.collections.colls[name]) {
self.sw.collections.colls[name].prefix = self.sw.prefix;
self.sw.collections.colls[name].rootPrefix = self.sw.prefix;
self.sw.collections.colls[name].staticPrefix = self.sw.staticPrefix;
self.sw.collections.root = name;
}
}
/**
* Handles resolving content for Zimit-style ZIM archives
*
* @param {FetchEvent} event The FetchEvent to be processed
* @returns {Promise<Response>} A Promise for the Response, or rejects with the invalid message port data * @returns {Promise<Response>} A Promise for the Response, or rejects with the invalid message port data
*/ */
function fetchUrlFromZIM (urlObject, range) { function zimitResolver (event) {
var rqUrl = event.request.url;
var zimStem = rqUrl.replace(/^.*?\/([^/]+?)\.zim\w?\w?\/.*/, '$1');
if (/\/A\/load\.js$/.test(rqUrl)) {
// If the request is for load.js, we should filter its contents to load the mainUrl, as we don't need the other stuff
// concerning registration of the ServiceWorker and postMessage handling
console.debug('[SW] Filtering content of load.js', rqUrl);
// First we have to get the contents of load.js from the ZIM, because it is a common name, and there is no way to be sure
// that the request will be for the Zimit load.js
return fetchUrlFromZIM(new URL(rqUrl)).then(function (response) {
// The response was found in the ZIM so we respond with it
// Clone the response before reading its body
var clonedResponse = response.clone();
return response.text().then(function (contents) {
// We need to replace the entire contents with a single function that loads mainUrl
if (/\.register\([^;]+?sw\.js\?replayPrefix/.test(contents)) {
var newContents = "window.location.href = window.location.href.replace(/index\\.html/, window.mainUrl.replace('https://', ''));";
var responseLoadJS = contsructResponse(newContents, 'text/javascript');
return responseLoadJS;
} else {
// The contents of load.js are not as expected, so we should return the original response
return clonedResponse;
}
});
});
// Check that the requested URL is for a ZIM that we already have loaded
} else if (zimStem !== rqUrl && isReplayWorkerAvailable) {
// Wait for the ReplayWorker to initialize and reload all collections
return replayCollectionsReloaded.then(function () {
if (self.sw.collections.colls && self.sw.collections.colls[zimStem]) {
if (self.sw.collections.root !== zimStem) {
setReplayCollectionAsRoot(self.sw.collections.colls[zimStem].config.sourceUrl, zimStem);
}
if (/\/A\/static\//.test(rqUrl)) {
// If the request is for static data from the replayWorker, we should get them from the Worker's cache
// DEV: This extracts both wombat.js and wombatWorkers.js from the staticData Map
var staticDataUrl = rqUrl.replace(/^(.*?\/)[^/]+?\.zim\w?\w?\/[AC/]{2,4}(.*)/, '$1$2')
if (self.sw.staticData) {
var staticData = self.sw.staticData.get(staticDataUrl);
if (staticData) {
console.debug('[SW] Returning static data from ReplayWorker', rqUrl);
// Construct a new Response with headers to return the static data
var responseStaticData = contsructResponse(staticData.content, staticData.type);
return Promise.resolve(responseStaticData);
} else {
// Return a 404 response
return Promise.resolve(new Response('', { status: 404, statusText: 'Not Found' }));
}
}
} else {
// console.debug('[SW] Asking ReplayWorker to handleFetch', rqUrl);
return self.sw.handleFetch(event);
}
} else {
// The requested ZIM is not loaded, or it is a regular non-Zimit request
return event.request;
}
});
} else {
// The loaded ZIM archive is not a Zimit archive, or sw-Zimit is unsupported, so we should just return the request
return Promise.resolve(event.request);
}
}
function contsructResponse (content, contentType) {
var headers = new Headers();
headers.set('Content-Length', content.length);
headers.set('Content-Type', contentType);
var responseInit = {
status: 200,
statusText: 'OK',
headers: headers
};
return new Response(content, responseInit);
}
// Caches and returns the event and response pair for an asset. Do not use this for non-asset requests!
function cacheAndReturnResponseForAsset (event, response) {
// Add css or js assets to ASSETS_CACHE (or update their cache entries) unless the URL schema is not supported
if (regexpCachedContentTypes.test(response.headers.get('Content-Type')) &&
!regexpExcludedURLSchema.test(event.request.url)) {
event.waitUntil(updateCache(ASSETS_CACHE, event.request.url, response.clone()));
}
return response;
}
/**
* Handles URLs that need to be extracted from the ZIM archive. They can be strings or URL objects, and should be URI encoded.
*
* @param {URL|String} urlObjectOrString The URL object, or a simple string representation, to be processed for extraction from the ZIM
* @param {String} range Optional byte range string (mostly used for video or audio streams)
* @param {String} expectedHeaders Optional comma-separated list of headers to be expected in the response (for error checking). Note that although
* Zimit requests may be for a range of bytes, in fact video (at least) is stored as a blob, so the appropriate response will just be a normal 200.
* @returns {Promise<Response>} A Promise for the Response, or rejects with the invalid message port data
*/
function fetchUrlFromZIM (urlObjectOrString, range, expectedHeaders) {
return new Promise(function (resolve, reject) { return new Promise(function (resolve, reject) {
var pathname = typeof urlObjectOrString === 'string' ? urlObjectOrString : urlObjectOrString.pathname;
// Note that titles may contain bare question marks or hashes, so we must use only the pathname without any URL parameters. // Note that titles may contain bare question marks or hashes, so we must use only the pathname without any URL parameters.
// Be sure that you haven't encoded any querystring along with the URL. // Be sure that you haven't encoded any querystring along with the URL (Zimit files, however, require encoding of the querystring)
var barePathname = decodeURIComponent(urlObject.pathname); var barePathname = decodeURIComponent(pathname);
var partsOfZIMUrl = regexpZIMUrlWithNamespace.exec(barePathname); var partsOfZIMUrl = regexpZIMUrlWithNamespace.exec(barePathname);
var prefix = partsOfZIMUrl ? partsOfZIMUrl[1] : ''; var prefix = partsOfZIMUrl ? partsOfZIMUrl[1] : '';
var nameSpace = partsOfZIMUrl ? partsOfZIMUrl[2] : ''; var nameSpace = partsOfZIMUrl ? partsOfZIMUrl[2] : '';
var title = partsOfZIMUrl ? partsOfZIMUrl[3] : barePathname; var title = partsOfZIMUrl ? partsOfZIMUrl[3] : barePathname;
var anchorTarget = urlObject.hash.replace(/^#/, ''); var anchorTarget = '';
var uriComponent = urlObject.search.replace(/\?kiwix-display/, ''); var uriComponent = '';
if (typeof urlObjectOrString === 'object') {
anchorTarget = urlObjectOrString.hash.replace(/^#/, '');
uriComponent = urlObjectOrString.search.replace(/\?kiwix-display/, '');
}
var titleWithNameSpace = nameSpace + '/' + title; var titleWithNameSpace = nameSpace + '/' + title;
var zimName = prefix.replace(/\/$/, ''); var zimName = prefix.replace(/\/$/, '');
// console.debug('[SW] Asking app.js for ' + titleWithNameSpace + ' from ' + zimName + '...');
var messageListener = function (msgPortEvent) { var messageListener = function (msgPortEvent) {
if (msgPortEvent.data.action === 'giveContent') { if (msgPortEvent.data.action === 'giveContent') {
// Content received from app.js // Content received from app.js (note that null indicates that the content was not found in the ZIM)
var contentLength = msgPortEvent.data.content ? (msgPortEvent.data.content.byteLength || msgPortEvent.data.content.length) : null; var contentLength = msgPortEvent.data.content !== null ? (msgPortEvent.data.content.byteLength || msgPortEvent.data.content.length) : null;
var contentType = msgPortEvent.data.mimetype; var contentType = msgPortEvent.data.mimetype;
var zimType = msgPortEvent.data.zimType;
// Set the imageDisplay variable if it has been sent in the event data // Set the imageDisplay variable if it has been sent in the event data
imageDisplay = typeof msgPortEvent.data.imageDisplay !== 'undefined' imageDisplay = typeof msgPortEvent.data.imageDisplay !== 'undefined'
? msgPortEvent.data.imageDisplay : imageDisplay; ? msgPortEvent.data.imageDisplay : imageDisplay;
var headers = new Headers(); var headers = new Headers();
if (contentLength) headers.set('Content-Length', contentLength); if (contentLength !== null) headers.set('Content-Length', contentLength);
// Prevent CORS issues in PWAs - not needed and insecure!
// headers.set('Access-Control-Allow-Origin', '*');
// Set Content-Security-Policy to sandbox the content (prevent XSS attacks from malicious ZIMs) // Set Content-Security-Policy to sandbox the content (prevent XSS attacks from malicious ZIMs)
headers.set('Content-Security-Policy', "default-src 'self' data: blob: about: chrome-extension: bingmaps: https://pwa.kiwix.org https://kiwix.github.io 'unsafe-inline' 'unsafe-eval'; sandbox allow-scripts allow-same-origin allow-modals allow-popups allow-forms allow-downloads;"); headers.set('Content-Security-Policy', "default-src 'self' data: file: blob: about: chrome-extension: bingmaps: https://pwa.kiwix.org https://kiwix.github.io 'unsafe-inline' 'unsafe-eval'; sandbox allow-scripts allow-same-origin allow-modals allow-popups allow-forms allow-downloads;");
headers.set('Referrer-Policy', 'no-referrer'); headers.set('Referrer-Policy', 'no-referrer');
if (contentType) headers.set('Content-Type', contentType); if (contentType) headers.set('Content-Type', contentType);
// Test if the content is a video or audio file. In this case, Chrome & Edge need us to support ranges. // Test if the content is a video or audio file. In this case, Chrome & Edge need us to support ranges.
// NB, the Replay Worker adds its own Accept-Ranges header, so we don't add it here for such requests.
// See kiwix-js #519 and openzim/zimwriterfs #113 for why we test for invalid types like "mp4" or "webm" (without "video/") // See kiwix-js #519 and openzim/zimwriterfs #113 for why we test for invalid types like "mp4" or "webm" (without "video/")
// The full list of types produced by zimwriterfs is in https://github.com/openzim/zimwriterfs/blob/master/src/tools.cpp // The full list of types produced by zimwriterfs is in https://github.com/openzim/zimwriterfs/blob/master/src/tools.cpp
if (contentLength >= 1 && /^(video|audio)|(^|\/)(mp4|webm|og[gmv]|mpeg)$/i.test(contentType)) { if (zimType !== 'zimit' && contentLength >= 1 && /^(video|audio)|(^|\/)(mp4|webm|og[gmv]|mpeg)$/i.test(contentType)) {
headers.set('Accept-Ranges', 'bytes'); headers.set('Accept-Ranges', 'bytes');
} }
var slicedData = msgPortEvent.data.content; var slicedData = msgPortEvent.data.content;
if (range) {
if (range && zimType === 'zimit') {
headers.set('Content-Range', range + '/*');
} else if (range && slicedData !== null) {
// The browser asks for a range of bytes (usually for a video or audio stream) // The browser asks for a range of bytes (usually for a video or audio stream)
// In this case, we partially honor the request: if it asks for offsets x to y, // In this case, we partially honor the request: if it asks for offsets x to y,
// we send partial contents starting at x offset, till the end of the data (ignoring y offset) // we send partial contents starting at x offset, till the end of the data (ignoring y offset)
@ -487,18 +725,33 @@ function fetchUrlFromZIM (urlObject, range) {
} }
var responseInit = { var responseInit = {
// HTTP status is usually 200, but has to bee 206 when partial content (range) is sent // HTTP status is usually 200, but has to be 206 when partial content (range) is sent
status: range ? 206 : 200, status: range ? 206 : 200,
statusText: 'OK', statusText: 'OK',
headers: headers headers: headers
}; };
// Deal with a not-found dirEntry
if (slicedData === null) {
responseInit.status = 404;
responseInit.statusText = 'Not Found';
}
if (slicedData === null) slicedData = '';
// if (expectedHeaders) {
// console.debug('[SW] Expected headers were', Object.fromEntries(expectedHeaders));
// console.debug('[SW] Constructed headers are', Object.fromEntries(headers));
// }
var httpResponse = new Response(slicedData, responseInit); var httpResponse = new Response(slicedData, responseInit);
// Let's send the content back from the ServiceWorker // Let's send the content back from the ServiceWorker
resolve({ response: httpResponse, data: msgPortEvent.data }); // resolve({ response: httpResponse, data: msgPortEvent.data });
resolve(httpResponse);
} else if (msgPortEvent.data.action === 'sendRedirect') { } else if (msgPortEvent.data.action === 'sendRedirect') {
resolve({ response: Response.redirect(prefix + msgPortEvent.data.redirectUrl) }); console.debug('[SW] Redirecting to ' + msgPortEvent.data.redirectUrl);
// resolve({ response: Response.redirect(prefix + msgPortEvent.data.redirectUrl) });
resolve(Response.redirect(prefix + msgPortEvent.data.redirectUrl));
} else { } else {
reject(msgPortEvent.data, titleWithNameSpace); reject(msgPortEvent.data, titleWithNameSpace);
} }

View File

@ -1231,11 +1231,16 @@
<option id="libzimDisable" value="disable">Disable</option> <option id="libzimDisable" value="disable">Disable</option>
</select> </select>
<p><i>Warning! Disables OS/ZIM checks and may make title search and article reading unresponsive!</i></p> <p><i>Warning! Disables OS/ZIM checks and may make title search and article reading unresponsive!</i></p>
<label class="checkbox"> <label class="checkbox" title="This is highly experimental and unstable. It also currently does not work with Zimit archives using the Replay Worker.">
<input type="checkbox" name="useLibzimReader" id="useLibzimReaderCheck"> <input type="checkbox" name="useLibzimReader" id="useLibzimReaderCheck">
<span class="checkmark"></span> <span class="checkmark"></span>
<b>Also use selected libzim for reading ZIM content</b> (<i>unstable</i>) <b>Also use selected libzim for reading ZIM content</b> (<i>unstable</i>)
</label> </label>
<label class="checkbox" title="The app automatically falls back to legacy support if the Replay system cannot run, but this option forces the app to use that mode. Primarily for developers maintaining the legacy code.">
<input type="checkbox" name="useLegacyZimitSupport" id="useLegacyZimitSupportCheck">
<span class="checkmark"></span>
<b>Use legacy method for reading Zimit-based archives</b> (may be faster on some systems)
</label>
<label class="checkbox" title="In some browsers on some platforms, drag-and-drop may malfunction and make it difficult to select text, and other operations. Disable it here if it is causing issues." > <label class="checkbox" title="In some browsers on some platforms, drag-and-drop may malfunction and make it difficult to select text, and other operations. Disable it here if it is causing issues." >
<input type="checkbox" name="disableDragAndDrop" id="disableDragAndDropCheck"> <input type="checkbox" name="disableDragAndDrop" id="disableDragAndDropCheck">
<span class="checkmark"></span> <span class="checkmark"></span>

File diff suppressed because it is too large Load Diff

View File

@ -109,6 +109,7 @@ params['windowOpener'] = getSetting('windowOpener'); // 'tab|window|false' A set
params['rightClickType'] = getSetting('rightClickType'); // 'single|double|false' A setting that determines whether a single or double right-click is used to open a new window/tab params['rightClickType'] = getSetting('rightClickType'); // 'single|double|false' A setting that determines whether a single or double right-click is used to open a new window/tab
params['navButtonsPos'] = getSetting('navButtonsPos') || 'bottom'; // 'top|bottom' A setting that determines where the back-forward nav buttons appear params['navButtonsPos'] = getSetting('navButtonsPos') || 'bottom'; // 'top|bottom' A setting that determines where the back-forward nav buttons appear
params['useOPFS'] = getSetting('useOPFS') === true; // A setting that determines whether to use OPFS (experimental) params['useOPFS'] = getSetting('useOPFS') === true; // A setting that determines whether to use OPFS (experimental)
params['useLegacyZimitSupport'] = getSetting('useLegacyZimitSupport') === true; // A setting that determines whether to force the use of legacy Zimit support
// Do not touch these values unless you know what they do! Some are global variables, some are set programmatically // Do not touch these values unless you know what they do! Some are global variables, some are set programmatically
params['cacheAPI'] = 'kiwixjs-assetsCache'; // Set the global Cache API database or cache name here, and synchronize with Service Worker params['cacheAPI'] = 'kiwixjs-assetsCache'; // Set the global Cache API database or cache name here, and synchronize with Service Worker
@ -250,6 +251,7 @@ document.getElementById('rememberLastPageCheck').checked = params.rememberLastPa
document.getElementById('displayFileSelectorsCheck').checked = params.showFileSelectors; document.getElementById('displayFileSelectorsCheck').checked = params.showFileSelectors;
document.getElementById('hideActiveContentWarningCheck').checked = params.hideActiveContentWarning; document.getElementById('hideActiveContentWarningCheck').checked = params.hideActiveContentWarning;
document.getElementById('useLibzimReaderCheck').checked = params.useLibzim; document.getElementById('useLibzimReaderCheck').checked = params.useLibzim;
document.getElementById('useLegacyZimitSupportCheck').checked = params.useLegacyZimitSupport;
document.getElementById('alphaCharTxt').value = params.alphaChar; document.getElementById('alphaCharTxt').value = params.alphaChar;
document.getElementById('omegaCharTxt').value = params.omegaChar; document.getElementById('omegaCharTxt').value = params.omegaChar;
document.getElementById('titleSearchRange').value = params.maxSearchResultsSize; document.getElementById('titleSearchRange').value = params.maxSearchResultsSize;

View File

@ -463,11 +463,10 @@ function getItemFromCacheOrZIM (selectedArchive, key, dirEntry) {
return; return;
} }
// Bypass getting dirEntry if we already have it // Bypass getting dirEntry if we already have it
var getDirEntry = dirEntry ? Promise.resolve() var getDirEntry = dirEntry ? function () { return Promise.resolve(dirEntry); }
: selectedArchive.getDirEntryByPath(title); : selectedArchive['getDirEntryByPath'];
// Read data from ZIM // Read data from ZIM
getDirEntry.then(function (resolvedDirEntry) { getDirEntry(title).then(function (resolvedDirEntry) {
if (dirEntry) resolvedDirEntry = dirEntry;
if (resolvedDirEntry === null) { if (resolvedDirEntry === null) {
console.log('Error: asset file not found: ' + title); console.log('Error: asset file not found: ' + title);
resolve(null); resolve(null);
@ -489,7 +488,7 @@ function getItemFromCacheOrZIM (selectedArchive, key, dirEntry) {
} }
// Set the read function to use according to filetype // Set the read function to use according to filetype
var readFile = /\b(?:x?html|css|javascript)\b/i.test(mimetype) var readFile = /\b(?:x?html|css|javascript)\b/i.test(mimetype)
? selectedArchive.readUtf8File : selectedArchive.readBinaryFile; ? selectedArchive['readUtf8File'] : selectedArchive['readBinaryFile'];
readFile(resolvedDirEntry, function (fileDirEntry, content) { readFile(resolvedDirEntry, function (fileDirEntry, content) {
if (!fileDirEntry && !content) { if (!fileDirEntry && !content) {
console.warn('Could not read asset ' + title); console.warn('Could not read asset ' + title);

View File

@ -80,6 +80,13 @@ function extractImages (images, callback) {
} else { image.setAttribute('data-kiwixsrc', imageUrl); } } else { image.setAttribute('data-kiwixsrc', imageUrl); }
image.removeAttribute('data-kiwixurl'); image.removeAttribute('data-kiwixurl');
var title = decodeURIComponent(imageUrl); var title = decodeURIComponent(imageUrl);
// Get any data-kiwixsrcset
var srcset = image.getAttribute('data-kiwixsrcset');
var srcsetArr = [];
if (srcset) {
// We need to get the array of images in the srcset
srcsetArr = srcset.split(',');
}
extractorBusy++; extractorBusy++;
if (/^data:image\/webp/i.test(imageUrl)) { if (/^data:image\/webp/i.test(imageUrl)) {
image.style.transition = 'opacity 0.3s ease-in'; image.style.transition = 'opacity 0.3s ease-in';
@ -118,7 +125,39 @@ function extractImages (images, callback) {
image.style.background = ''; image.style.background = '';
var mimetype = dirEntry.getMimetype(); var mimetype = dirEntry.getMimetype();
uiUtil.feedNodeWithBlob(image, 'src', content, mimetype, params.manipulateImages || params.allowHTMLExtraction, function () { uiUtil.feedNodeWithBlob(image, 'src', content, mimetype, params.manipulateImages || params.allowHTMLExtraction, function () {
checkBatch(); if (srcsetArr.length) {
// We need to process each image in the srcset
// Empty or make a new srcset
image.srcset = '';
var srcsetCount = srcsetArr.length;
srcsetArr.forEach(function (imgAndResolutionUrl) {
srcsetCount--;
// Get the url and the resolution from the srcset entry
var urlMatch = imgAndResolutionUrl.match(/^\s*([^\s]+)\s+([0-9.]+\w+)\s*$/);
var url = urlMatch ? urlMatch[1] : '';
var resolution = urlMatch ? urlMatch[2]: '';
appstate.selectedArchive.getDirEntryByPath(url).then(function (srcEntry) {
appstate.selectedArchive.readBinaryFile(srcEntry, function (fileDirEntry, content) {
var mimetype = srcEntry.getMimetype();
uiUtil.getDataUriFromUint8Array(content, mimetype).then(function (dataUri) {
// Add the dataUri to the srcset
image.srcset += (image.srcset ? ', ' : '') + dataUri + ' ' + resolution;
if (srcsetCount === 0) {
checkBatch();
}
}).catch(function (e) {
console.error('Could not get dataUri for image:' + url, e);
if (srcsetCount === 0) checkBatch();
});
});
}).catch(function (e) {
console.error('Could not find DirEntry for image:' + url, e);
if (srcsetCount === 0) checkBatch();
});
});
} else {
checkBatch();
}
}); });
image.style.transition = 'opacity 0.3s ease-in'; image.style.transition = 'opacity 0.3s ease-in';
image.style.opacity = '1'; image.style.opacity = '1';
@ -271,7 +310,7 @@ function prepareImagesServiceWorker (win, forPrinting) {
documentImages[i].style.opacity = '0'; documentImages[i].style.opacity = '0';
} }
if (params.manipulateImages || params.allowHTMLExtraction) { if (params.manipulateImages || params.allowHTMLExtraction) {
documentImages[i].outerHTML = documentImages[i].outerHTML.replace(params.regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, blockEnd) { documentImages[i].outerHTML = documentImages[i].outerHTML.replace(params.regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, querystring, blockEnd) {
var parameters = relAssetUrl.replace(/^[^?]+/, ''); var parameters = relAssetUrl.replace(/^[^?]+/, '');
var assetZIMUrlEnc; var assetZIMUrlEnc;
if (params.zimType === 'zimit' && !relAssetUrl.indexOf(indexRoot)) { if (params.zimType === 'zimit' && !relAssetUrl.indexOf(indexRoot)) {
@ -316,7 +355,7 @@ function prepareImagesServiceWorker (win, forPrinting) {
function prepareImagesJQuery (win, forPrinting) { function prepareImagesJQuery (win, forPrinting) {
container = win; container = win;
var doc = container.document; var doc = container.document;
var documentImages = doc.querySelectorAll('img[data-kiwixurl], video, audio'); var documentImages = doc.querySelectorAll('img[data-kiwixurl], img[data-kiwixsrcset], video, audio');
var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/'; var indexRoot = window.location.pathname.replace(/[^/]+$/, '') + encodeURI(appstate.selectedArchive.file.name) + '/';
indexRoot = indexRoot.replace(/^\//, ''); indexRoot = indexRoot.replace(/^\//, '');
// Zimit ZIMs work better if all images are extracted // Zimit ZIMs work better if all images are extracted

View File

@ -174,12 +174,13 @@ function transformReplayUrls (dirEntry, data, mimetype) {
// Deal with image srcsets // Deal with image srcsets
data = data.replace(/<img\b[^>]+srcset=["']([^"']+)/ig, function (match, srcset) { data = data.replace(/<img\b[^>]+srcset=["']([^"']+)/ig, function (match, srcset) {
var srcsetArr = srcset.split(','); var srcsetArr = srcset.split(',');
var swPrefix = params.contentInjectionMode === 'serviceworker' ? indexRoot + '/' : '';
for (var i = 0; i < srcsetArr.length; i++) { for (var i = 0; i < srcsetArr.length; i++) {
// For root-relative links, we need to add the zimitPrefix // For root-relative links, we need to add the zimitPrefix
srcsetArr[i] = srcsetArr[i].replace(/^\s?\/(?!\/)/, indexRoot + '/' + dirEntry.namespace + '/' + params.zimitPrefix + '/'); srcsetArr[i] = srcsetArr[i].replace(/^\s*\/(?!\/)/, swPrefix + dirEntry.namespace + '/' + params.zimitPrefix + '/');
// Zimit prefix is in the URL for absolute URLs // Zimit prefix is in the URL for absolute URLs
srcsetArr[i] = srcsetArr[i].replace(/^(?:\s?https?:)?\/\//i, indexRoot + '/' + dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : '')); srcsetArr[i] = srcsetArr[i].replace(/^(?:\s*https?:)?\/\//i, swPrefix + dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : ''));
if (rootDirectory) srcsetArr[i] = srcsetArr[i].replace(/^(\.\.\/?)+/, indexRoot + '/' + dirEntry.namespace + '/' + params.zimitPrefix + '/'); if (rootDirectory) srcsetArr[i] = srcsetArr[i].replace(/^(\.\.\/?)+/, swPrefix + dirEntry.namespace + '/' + params.zimitPrefix + '/');
srcsetArr[i] = '@kiwixtransformed@' + srcsetArr[i]; srcsetArr[i] = '@kiwixtransformed@' + srcsetArr[i];
} }
match = match.replace(srcset, srcsetArr.join(', ')); match = match.replace(srcset, srcsetArr.join(', '));

View File

@ -1,22 +1,22 @@
/** /**
* uiUtil.js : Utility functions for the User Interface * uiUtil.js : Utility functions for the User Interface
* *
* Copyright 2013-2023 Mossroy, Jaifroid and contributors * Copyright 2013-2024 Mossroy, Jaifroid and contributors
* License GPL v3: * Licence GPL v3:
* *
* This file is part of Kiwix. * This file is part of Kiwix.
* *
* Kiwix is free software: you can redistribute it and/or modify * Kiwix is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public Licence as published by
* the Free Software Foundation, either version 3 of the License, or * the Free Software Foundation, either version 3 of the Licence, or
* (at your option) any later version. * (at your option) any later version.
* *
* Kiwix is distributed in the hope that it will be useful, * Kiwix is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public Licence for more details.
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public Licence
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/> * along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
*/ */
@ -32,9 +32,10 @@ import util from './util.js';
*/ */
var itemsCount = false; var itemsCount = false;
// Placeholders for the header and footer // Placeholders for the articleContainer, header and footer
const header = document.getElementById('top'); const header = document.getElementById('top');
const footer = document.getElementById('footer'); const footer = document.getElementById('footer');
let articleContainer = document.getElementById('articleContent');
/** /**
* Hides slide-away UI elements * Hides slide-away UI elements
@ -80,7 +81,12 @@ let scrollThrottle = false;
* Luuncher for the slide-away function, including a throttle to prevent it being called too often * Luuncher for the slide-away function, including a throttle to prevent it being called too often
*/ */
function scroller (e) { function scroller (e) {
const articleContainer = document.getElementById('articleContent'); // We have to refresh the articleContainer when the window changes
articleContainer = document.getElementById('articleContent');
// Get the replay_iframe if it exists
if (articleContainer.contentWindow && articleContainer.contentWindow.document && articleContainer.contentWindow.document.getElementById('replay_iframe')) {
articleContainer = articleContainer.contentWindow.document.getElementById('replay_iframe');
}
if (scrollThrottle) return; if (scrollThrottle) return;
// windowIsScrollable gets set and reset in slideAway() // windowIsScrollable gets set and reset in slideAway()
if (windowIsScrollable && e.type === 'wheel') return; if (windowIsScrollable && e.type === 'wheel') return;
@ -123,7 +129,6 @@ let windowIsScrollable = false;
// Slides away or restores the header and footer // Slides away or restores the header and footer
function slideAway (e) { function slideAway (e) {
const articleContainer = document.getElementById('articleContent');
const newScrollY = articleContainer.contentWindow.pageYOffset; const newScrollY = articleContainer.contentWindow.pageYOffset;
let delta; let delta;
const visibleState = /\(0p?x?\)/.test(header.style.transform); const visibleState = /\(0p?x?\)/.test(header.style.transform);
@ -208,21 +213,20 @@ function feedNodeWithBlob (node, nodeAttribute, content, mimeType, makeDataURI,
if (callback) callback(); if (callback) callback();
}); });
} else { } else {
var blob = new Blob([content], { type: mimeType });
var url; var url;
if (makeDataURI) { if (makeDataURI) {
// Because btoa fails on utf8 strings (in SVGs, for example) we need to use FileReader method // Because btoa fails on utf8 strings (in SVGs, for example) we need to use FileReader method
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem // See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// url = 'data:' + mimeType + ';base64,' + btoa(util.uintToString(content)); // url = 'data:' + mimeType + ';base64,' + btoa(util.uintToString(content));
var myReader = new FileReader(); getDataUriFromUint8Array(content, mimeType).then(function (uri) {
myReader.onloadend = function () { node.setAttribute(nodeAttribute, uri);
url = myReader.result; if (callback) callback(uri);
node.setAttribute(nodeAttribute, url); }).catch(function (err) {
if (callback) callback(url); console.error('There was an error converting binary content to data URI', err);
}; if (callback) callback(null);
myReader.readAsDataURL(blob); });
} else { } else {
blob = new Blob([content], { var blob = new Blob([content], {
type: mimeType type: mimeType
}); });
// Establish the current window (avoids having to pass it to this function) // Establish the current window (avoids having to pass it to this function)
@ -237,6 +241,29 @@ function feedNodeWithBlob (node, nodeAttribute, content, mimeType, makeDataURI,
} }
} }
/**
* Creates a data: URI from the given content
* @param {Uint8Array} content The binary content to convert to a URI
* @param {String} mimeType The MIME type of the content
* @returns {Promise<String>} A promise that resolves to the data URI
*/
function getDataUriFromUint8Array (content, mimeType) {
// Use FileReader method because btoa fails on utf8 strings (in SVGs, for example)
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// This native browser method is very fast: see https://stackoverflow.com/a/66046176/9727685
return new Promise((resolve, reject) => {
var myReader = new FileReader();
myReader.onloadend = function () {
var url = myReader.result;
resolve(url);
};
myReader.onerror = function (err) {
reject(err);
};
myReader.readAsDataURL(new Blob([content], { type: mimeType }));
});
}
/** /**
* Removes parameters and anchors from a URL * Removes parameters and anchors from a URL
* @param {type} url The URL to be processed * @param {type} url The URL to be processed
@ -377,9 +404,7 @@ function clearSpinner () {
cachingAssets.style.display = 'none'; cachingAssets.style.display = 'none';
} }
function printCustomElements () { function printCustomElements (innerDocument) {
// var innerDocument = window.frames[0].frameElement.contentDocument;
var innerDocument = document.getElementById('articleContent').contentDocument;
// For now, adding a printing stylesheet to a zimit ZIM appears to diasble printing of any images! // For now, adding a printing stylesheet to a zimit ZIM appears to diasble printing of any images!
if (appstate.wikimediaZimLoaded) { if (appstate.wikimediaZimLoaded) {
// Add any missing classes // Add any missing classes
@ -424,7 +449,7 @@ function printCustomElements () {
// Using @media print on images doesn't get rid of them all, so use brute force // Using @media print on images doesn't get rid of them all, so use brute force
if (!document.getElementById('printImageCheck').checked) { if (!document.getElementById('printImageCheck').checked) {
innerDocument.body.innerHTML = innerDocument.body.innerHTML.replace(/<img\b[^>]*>\s*/ig, ''); innerDocument.body.innerHTML = innerDocument.body.innerHTML.replace(/<img\b[^>]*>\s*/ig, '');
} else { } else if (appstate.selectedArchive.zimType === 'open') {
// Remove any breakout link // Remove any breakout link
innerDocument.body.innerHTML = innerDocument.body.innerHTML.replace(/<img\b[^>]+id="breakoutLink"[^>]*>\s*/, ''); innerDocument.body.innerHTML = innerDocument.body.innerHTML.replace(/<img\b[^>]+id="breakoutLink"[^>]*>\s*/, '');
} }
@ -541,7 +566,7 @@ function displayActiveContentWarning (type) {
// '<strong>' + (params.contentInjectionMode === 'jquery' ? 'Limited Zimit' : 'Experimental') + ' support:</strong> ' + // '<strong>' + (params.contentInjectionMode === 'jquery' ? 'Limited Zimit' : 'Experimental') + ' support:</strong> ' +
(params.contentInjectionMode === 'jquery' ? '<b>Limited Zimit support!</b> Please <a id="swModeLink" href="#contentInjectionModeDiv" ' + (params.contentInjectionMode === 'jquery' ? '<b>Limited Zimit support!</b> Please <a id="swModeLink" href="#contentInjectionModeDiv" ' +
'class="alert-link">switch to Service Worker mode</a> if your platform supports it.<br />' 'class="alert-link">switch to Service Worker mode</a> if your platform supports it.<br />'
: 'Support for <b>Zimit</b> archives is experimental. Audio/video and some dynamic content may fail.<br />') + : 'Legacy support for <b>Zimit</b> archives. Audio/video and some dynamic content may fail.<br />') +
'Start search with <b>.*</b> to match part of a title, type <b><i>space</i></b> for the ZIM Archive Index, or ' + 'Start search with <b>.*</b> to match part of a title, type <b><i>space</i></b> for the ZIM Archive Index, or ' +
'<b><i>space / </i></b> for the URL Index.&nbsp;[<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' + '<b><i>space / </i></b> for the URL Index.&nbsp;[<a id="stop" href="#expertSettingsDiv" class="alert-link">Permanently hide</a>]' +
'</div>'; '</div>';
@ -1391,6 +1416,7 @@ export default {
systemAlert: systemAlert, systemAlert: systemAlert,
showUpgradeReady: showUpgradeReady, showUpgradeReady: showUpgradeReady,
feedNodeWithBlob: feedNodeWithBlob, feedNodeWithBlob: feedNodeWithBlob,
getDataUriFromUint8Array: getDataUriFromUint8Array,
deriveZimUrlFromRelativeUrl: deriveZimUrlFromRelativeUrl, deriveZimUrlFromRelativeUrl: deriveZimUrlFromRelativeUrl,
getClosestMatchForTagname: getClosestMatchForTagname, getClosestMatchForTagname: getClosestMatchForTagname,
removeUrlParameters: removeUrlParameters, removeUrlParameters: removeUrlParameters,

View File

@ -20,7 +20,7 @@
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/> * along with Kiwix (file LICENSE-GPLv3.txt). If not, see <http://www.gnu.org/licenses/>
*/ */
/* global fs, params */ /* global fs, params, appstate */
'use strict'; 'use strict';
@ -484,6 +484,9 @@ function Hilitor (node, tag) {
var nodeText = subNodes.join(' '); var nodeText = subNodes.join(' ');
if (testInput.test(nodeText)) { if (testInput.test(nodeText)) {
var iframeWindow = document.getElementById('articleContent').contentWindow; var iframeWindow = document.getElementById('articleContent').contentWindow;
if (appstate.isReplayWorkerAvailable) {
iframeWindow = document.getElementById('articleContent').contentDocument.getElementById('replay_iframe').contentWindow;
}
var scrollOffset = hilitedNodes[start].offsetTop - window.innerHeight / 4; var scrollOffset = hilitedNodes[start].offsetTop - window.innerHeight / 4;
if ('scrollBehavior' in document.documentElement.style) { if ('scrollBehavior' in document.documentElement.style) {
iframeWindow.scrollTo({ iframeWindow.scrollTo({

View File

@ -83,9 +83,6 @@ function ZIMArchive (storage, path, callbackReady, callbackError) {
that.addMetadataToZIMFile('Language') that.addMetadataToZIMFile('Language')
]).then(function () { ]).then(function () {
console.debug('ZIMArchive ready, metadata will be added in the background'); console.debug('ZIMArchive ready, metadata will be added in the background');
uiUtil.clearSpinner();
// All listings should be loaded, so we can now call the callback
callbackReady(that);
// Add non-time-critical metadata to archive in background so as not to delay opening of the archive // Add non-time-critical metadata to archive in background so as not to delay opening of the archive
// DEV: Note that it does not make sense to extract illustration (icon) metadata here. Instead, if you implement use of the illustration // DEV: Note that it does not make sense to extract illustration (icon) metadata here. Instead, if you implement use of the illustration
// metadata as icons for the loaded ZIM [kiwix-js #886], you should simply use the ZIMArdhive.getMetadata() function when needed // metadata as icons for the loaded ZIM [kiwix-js #886], you should simply use the ZIMArdhive.getMetadata() function when needed
@ -101,6 +98,15 @@ function ZIMArchive (storage, path, callbackReady, callbackError) {
console.debug('ZIMArchive metadata loaded:', that); console.debug('ZIMArchive metadata loaded:', that);
}); });
}, 2000); // DEV: If you need any of the above earlier, you can alter this delay }, 2000); // DEV: If you need any of the above earlier, you can alter this delay
// We need to get the landing page of any Zimit archive opened
if (that.zimType === 'zimit') {
return that.setZimitMetadata().then(function () {
callbackReady(that);
});
} else {
// All listings should be loaded, so we can now call the callback
callbackReady(that);
}
}); });
}; };
var createZimfile = function (fileArray) { var createZimfile = function (fileArray) {
@ -270,7 +276,7 @@ ZIMArchive.prototype.getMainPageDirEntry = function (callback) {
var that = this; var that = this;
this.file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) { this.file.dirEntryByUrlIndex(mainPageUrlIndex).then(function (dirEntry) {
// Filter out Zimit files that we cannot handle without error // Filter out Zimit files that we cannot handle without error
if (that.zimType === 'zimit') dirEntry = transformZimit.filterReplayFiles(dirEntry); if (that.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) dirEntry = transformZimit.filterReplayFiles(dirEntry);
callback(dirEntry); callback(dirEntry);
}); });
} }
@ -646,7 +652,7 @@ ZIMArchive.prototype.callLibzimWorker = function (parameters) {
ZIMArchive.prototype.resolveRedirect = function (dirEntry, callback) { ZIMArchive.prototype.resolveRedirect = function (dirEntry, callback) {
var that = this; var that = this;
this.file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) { this.file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (resolvedDirEntry) {
if (that.zimType === 'zimit') resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry); if (that.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) resolvedDirEntry = transformZimit.filterReplayFiles(resolvedDirEntry);
callback(resolvedDirEntry); callback(resolvedDirEntry);
}); });
}; };
@ -662,18 +668,25 @@ ZIMArchive.prototype.resolveRedirect = function (dirEntry, callback) {
* @param {callbackStringContent} callback * @param {callbackStringContent} callback
*/ */
ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) { ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) {
if (params.isLandingPage && appstate.selectedArchive.zimType === 'zimit' && !appstate.isReplayWorkerAvailable && dirEntry.namespace !== 'M') {
// Mark the directory entry as a redirect
dirEntry.zimitRedirect = this.zimitStartPage;
// Prevent reload loop!
params.isLandingPage = false;
}
var that = this || appstate.selectedArchive;
if (!dirEntry) { if (!dirEntry) {
console.warn('No directory entry found for requested URL!'); console.warn('No directory entry found for requested URL!');
return callback(dirEntry, ''); return callback(dirEntry, '');
} }
var cns = appstate.selectedArchive.getContentNamespace(); var cns = that.getContentNamespace();
return dirEntry.readData().then(function (data) { return dirEntry.readData().then(function (data) {
var mimetype = dirEntry.getMimetype(); var mimetype = dirEntry.getMimetype();
if (window.TextDecoder) { var html = that.getUtf8FromData(data);
data = new TextDecoder('utf-8').decode(data); // Bypass everything if we're using Replay Worker
} else { if (appstate.isReplayWorkerAvailable) {
// Support for IE11 and Edge Legacy - only support UTF-8 decoding callback(dirEntry, html);
data = utf8.parse(data); return;
} }
if (/\bx?html\b/i.test(mimetype)) { if (/\bx?html\b/i.test(mimetype)) {
// If the data were encoded with a different mimtype, here is how to change it // If the data were encoded with a different mimtype, here is how to change it
@ -682,11 +695,11 @@ ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) {
// if (encoding && !/utf-8/i.test(encoding)) decData = new TextDecoder(encoding).decode(data); // if (encoding && !/utf-8/i.test(encoding)) decData = new TextDecoder(encoding).decode(data);
// Some Zimit assets have moved location and we need to follow the moved permanently data // Some Zimit assets have moved location and we need to follow the moved permanently data
if (/301\s*moved\s+permanently/i.test(data)) dirEntry = transformZimit.getZimitRedirect(dirEntry, data, cns); if (/301\s*moved\s+permanently/i.test(html)) dirEntry = transformZimit.getZimitRedirect(dirEntry, html, cns);
// Some Zimit archives have an incorrect meta charset tag. See https://github.com/openzim/warc2zim/issues/88. // Some Zimit archives have an incorrect meta charset tag. See https://github.com/openzim/warc2zim/issues/88.
// So we remove it! // So we remove it!
data = data.replace(/<meta\b[^>]+?Content-Type[^>]+?charset=([^'"\s]+)[^>]+>\s*/i, function (m0, m1) { html = html.replace(/<meta\b[^>]+?Content-Type[^>]+?charset=([^'"\s]+)[^>]+>\s*/i, function (m0, m1) {
if (!/utf-8/i.test(m1)) { if (!/utf-8/i.test(m1)) {
return ''; return '';
} }
@ -694,10 +707,10 @@ ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) {
}); });
} }
if (dirEntry.inspect || dirEntry.zimitRedirect) { if (dirEntry.inspect || dirEntry.zimitRedirect) {
if (dirEntry.inspect) dirEntry = transformZimit.getZimitRedirect(dirEntry, data, cns); if (dirEntry.inspect) dirEntry = transformZimit.getZimitRedirect(dirEntry, html, cns);
if (dirEntry.zimitRedirect) { if (dirEntry.zimitRedirect) {
return appstate.selectedArchive.getDirEntryByPath(dirEntry.zimitRedirect).then(function (rd) { return that.getDirEntryByPath(dirEntry.zimitRedirect).then(function (rd) {
return appstate.selectedArchive.readUtf8File(rd, callback); return that.readUtf8File(rd, callback);
}); });
} }
} else { } else {
@ -706,9 +719,9 @@ ZIMArchive.prototype.readUtf8File = function (dirEntry, callback) {
if (params.zimType === 'zimit' && /\/(?:x?html|css|javascript)\b/i.test(mimetype) && if (params.zimType === 'zimit' && /\/(?:x?html|css|javascript)\b/i.test(mimetype) &&
// DEV: We do not want to transform CSS and JS files that the user wishes to inspect the contents of // DEV: We do not want to transform CSS and JS files that the user wishes to inspect the contents of
!(dirEntry.fromArticleList && /\/(?:css|javascript)\b/i.test(mimetype))) { !(dirEntry.fromArticleList && /\/(?:css|javascript)\b/i.test(mimetype))) {
data = transformZimit.transformReplayUrls(dirEntry, data, mimetype); html = transformZimit.transformReplayUrls(dirEntry, html, mimetype);
} }
callback(dirEntry, data); callback(dirEntry, html);
} }
}).catch(function (e) { }).catch(function (e) {
console.error('Error reading directory entry', e); console.error('Error reading directory entry', e);
@ -732,6 +745,11 @@ ZIMArchive.prototype.readBinaryFile = function (dirEntry, callback) {
return callback(dirEntry, ''); return callback(dirEntry, '');
} }
return dirEntry.readData().then(function (data) { return dirEntry.readData().then(function (data) {
// Bypass everything if we're using Replay Worker
if (appstate.selectedArchive.zimType === 'zimit' && appstate.isReplayWorkerAvailable) {
callback(dirEntry, data);
return;
}
var mimetype = dirEntry.getMimetype(); var mimetype = dirEntry.getMimetype();
if (dirEntry.inspect) { if (dirEntry.inspect) {
dirEntry = transformZimit.getZimitRedirect(dirEntry, utf8.parse(data), appstate.selectedArchive.getContentNamespace()); dirEntry = transformZimit.getZimitRedirect(dirEntry, utf8.parse(data), appstate.selectedArchive.getContentNamespace());
@ -753,6 +771,22 @@ ZIMArchive.prototype.readBinaryFile = function (dirEntry, callback) {
}); });
}; };
/**
* Gets the UTF-8 string from the binary data
* @param {Blob} data Binary content
* @returns {String} UTF-8 string
*/
ZIMArchive.prototype.getUtf8FromData = function (data) {
var decData;
if (window.TextDecoder) {
decData = new TextDecoder('utf-8').decode(data);
} else {
// Support for IE11 and Edge Legacy - only support UTF-8 decoding
decData = utf8.parse(data);
}
return decData;
}
/** /**
* Searches the URL pointer list of Directory Entries by pathname * Searches the URL pointer list of Directory Entries by pathname
* @param {String} path The pathname of the DirEntry that is required (namespace + filename) * @param {String} path The pathname of the DirEntry that is required (namespace + filename)
@ -762,17 +796,19 @@ ZIMArchive.prototype.readBinaryFile = function (dirEntry, callback) {
*/ */
ZIMArchive.prototype.getDirEntryByPath = function (path, zimitResolving, originalPath) { ZIMArchive.prototype.getDirEntryByPath = function (path, zimitResolving, originalPath) {
var that = this; var that = this;
if (originalPath) appstate.originalPath = originalPath; if (that.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) {
path = path.replace(/\?kiwix-display/, ''); if (originalPath) appstate.originalPath = originalPath;
// Correct obvious errors path = path.replace(/\?kiwix-display/, '');
if (!originalPath) { // Correct obvious errors
var revisedPath = path.replace(/.*?((?:C\/A|A)\/(?!.*(?:C\/A|A)).+)$/, '$1'); if (!originalPath) {
if (revisedPath !== path) { var revisedPath = path.replace(/.*?((?:C\/A|A)\/(?!.*(?:C\/A|A)).+)$/, '$1');
console.warn('*** Revised path from ' + path + '\nto: ' + revisedPath + ' ***'); if (revisedPath !== path) {
if (appstate.selectedArchive.zimType === 'zimit') { console.warn('*** Revised path from ' + path + '\nto: ' + revisedPath + ' ***');
console.debug('*** DEV: Consider correcting this error in tranformZimit.js ***'); if (appstate.selectedArchive.zimType === 'zimit') {
console.debug('*** DEV: Consider correcting this error in tranformZimit.js ***');
}
path = revisedPath;
} }
path = revisedPath;
} }
} }
return util.binarySearch(0, this.file.entryCount, function (i) { return util.binarySearch(0, this.file.entryCount, function (i) {
@ -791,10 +827,10 @@ ZIMArchive.prototype.getDirEntryByPath = function (path, zimitResolving, origina
return that.file.dirEntryByUrlIndex(index); return that.file.dirEntryByUrlIndex(index);
}).then(function (dirEntry) { }).then(function (dirEntry) {
// Filter Zimit dirEntries and do somee initial transforms // Filter Zimit dirEntries and do somee initial transforms
if (that.zimType === 'zimit') { if (that.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) {
dirEntry = transformZimit.filterReplayFiles(dirEntry); dirEntry = transformZimit.filterReplayFiles(dirEntry);
} }
if (!dirEntry) { if (!dirEntry && !appstate.isReplayWorkerAvailable) {
// We couldn't get the dirEntry, so look it up the Zimit header // We couldn't get the dirEntry, so look it up the Zimit header
if (!zimitResolving && that.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) { if (!zimitResolving && that.zimType === 'zimit' && !/^(H|C\/H)\//.test(path) && path !== appstate.originalPath) {
// We need to look the file up in the Header namespace (double replacement ensures both types of ZIM are supported) // We need to look the file up in the Header namespace (double replacement ensures both types of ZIM are supported)
@ -915,6 +951,41 @@ ZIMArchive.prototype.addMetadataToZIMFile = function (key) {
}); });
}; };
/**
* Sets the Zimit metadata for the archive
*/
ZIMArchive.prototype.setZimitMetadata = function () {
var that = this;
// Get the landing page
return this.file.dirEntryByUrlIndex(this.file.mainPage).then(function (dirEntry) {
var findRedirectTarget = dirEntry.redirect ? function (dirEntry) {
// If the landing page is a redirect, we need to find the target
return that.file.dirEntryByUrlIndex(dirEntry.redirectTarget).then(function (newEntry) {
return newEntry;
});
} : function (dirEntry) {
return Promise.resolve(dirEntry);
};
return findRedirectTarget(dirEntry).then(function (reEntry) {
return reEntry.readData().then(function (data) {
var html = that.getUtf8FromData(data);
var redirect = html.match(/window\.mainUrl\s*=\s*(['"])https?:\/\/([^/]+)(.+?)\1/);
if (redirect && redirect[2] && redirect[3]) {
// Logic added to distinguish between Type 0 and Type 1 Zimit ZIMs
var relativeZimitPrefix = (reEntry.namespace === 'C' ? 'A/' : '') + redirect[2];
var zimitStartPage = reEntry.namespace + '/' + relativeZimitPrefix + redirect[3];
// Store a full Zimit prefix in the archive object
that.zimitPrefix = relativeZimitPrefix + '/';
that.zimitStartPage = zimitStartPage;
that.zimitPseudoContentNamespace = reEntry.namespace + '/' + (reEntry.namespace === 'C' ? 'A/' : '');
}
});
});
}).catch(function (e) {
console.warn('Zimit metadata not found in this archive!', e);
});
}
export default { export default {
ZIMArchive: ZIMArchive ZIMArchive: ZIMArchive
}; };

96
www/topFrame.html Normal file
View File

@ -0,0 +1,96 @@
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="Content-Security-Policy" content="default-src 'self' data: file: blob: about: 'unsafe-inline' 'unsafe-eval'; frame-src 'self' moz-extension: chrome-extension:; object-src 'none';">
<meta charset="utf-8"/>
<style>
html, body
{
position: fixed;
top: 0;
left: 0;
bottom: 0;
right: 0;
margin: 0;
padding: 0;
border: 0;
overflow: hidden;
}
iframe {
width: 100%;
height: 100%;
overflow: scroll;
}
</style>
</head>
<body style="margin: 0; padding: 0;">
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
<script>
const prefix = "$PREFIX";
const startUrl = "$URL";
const iframe = document.querySelector("iframe");
// update URL when iframe changes
window.addEventListener("message", function() {
if (event.data.wb_type === "load" || event.data.wb_type === "replace-url") {
if (event.data.title) {
document.title = event.data.title;
}
// remove scheme to be consistent with current canonicalization
const urlNoScheme = event.data.url.slice(event.data.url.indexOf("//") + 2);
window.history.replaceState(null, "", prefix + urlNoScheme);
// if icons received, replace any existing icons with new ones
if (event.data.icons) {
const head = document.querySelector('head');
const oldLinks = document.querySelectorAll("link[rel*='icon']");
for (const link of oldLinks) {
head.removeChild(link);
}
// attempt to load the default "<origin>/favicon.ico" if no other favicon is specified
if (!event.data.icons.length) {
event.data.icons = [{
"href": prefix + "mp_/" + new URL("/favicon.ico", event.data.url),
"rel": "icon"
}];
}
for (const icon of event.data.icons) {
const link = document.createElement('link');
link.rel = icon.rel;
const parts = icon.href.split("/mp_/", 2);
// probably an invalid URL
if (parts.length < 2) {
continue;
}
const url = parts[1];
const urlNoScheme = url.slice(url.indexOf("//") + 2);
// need to escape utf-8, then % encode the entire string
let encodedUrl = encodeURI(urlNoScheme);
encodedUrl = encodeURIComponent(urlNoScheme)
link.href = parts[0] + "/" + encodedUrl;
head.appendChild(link);
}
}
}
});
iframe.src = prefix + "mp_/" + startUrl;
</script>
</body>
</html>