diff --git a/service-worker.js b/service-worker.js
index 7e993034..ee5f8318 100644
--- a/service-worker.js
+++ b/service-worker.js
@@ -384,9 +384,12 @@ function fetchRequestFromZIM(fetchEvent) {
var prefix = regexpResult[1];
nameSpace = regexpResult[2];
title = regexpResult[3];
+ var anchorTarget = fetchEvent.request.url.match(/#([^#;]*)$/);
+ anchorTarget = anchorTarget ? anchorTarget[1] : '';
- // We need to remove the potential parameters in the URL
- title = removeUrlParameters(decodeURIComponent(title));
+ // We need to remove the potential parameters in the URL. Note that titles may contain question marks or hashes, so we test the
+ // encoded URI before decoding it. Be sure that you haven't encoded any querystring along with the URL, e.g. for clicked links.
+ title = decodeURIComponent(removeUrlParameters(title));
titleWithNameSpace = nameSpace + '/' + title;
@@ -432,7 +435,8 @@ function fetchRequestFromZIM(fetchEvent) {
};
outgoingMessagePort.postMessage({
'action': 'askForContent',
- 'title': titleWithNameSpace
+ 'title': titleWithNameSpace,
+ 'anchorTarget': anchorTarget
}, [messageChannel.port2]);
});
}
@@ -443,7 +447,13 @@ function fetchRequestFromZIM(fetchEvent) {
* @returns {String} The same URL without its parameters and anchors
*/
function removeUrlParameters(url) {
- return url.replace(/([^?#]+)[?#].*$/, '$1');
+ // Remove any querystring
+ var strippedUrl = url.replace(/\?[^?]*$/, '');
+ // Remove any anchor parameters - note that IN PRACTICE anchor parameters cannot contain a semicolon because JavaScript maintains
+ // compatibility with HTML4, so we can avoid accidentally stripping e.g. ' by excluding an anchor if any semicolon is found
+ // between it and the end of the string. See https://stackoverflow.com/a/79022/9727685.
+ strippedUrl = strippedUrl.replace(/#[^#;]*$/, '');
+ return strippedUrl;
}
/**
diff --git a/www/js/app.js b/www/js/app.js
index 3e05fe63..ad6fc5a2 100644
--- a/www/js/app.js
+++ b/www/js/app.js
@@ -243,7 +243,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
if (/Enter/.test(e.key)) {
if (activeElement.classList.contains('hover')) {
var dirEntryId = activeElement.getAttribute('dirEntryId');
- findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId);
+ findDirEntryFromDirEntryIdAndLaunchArticleRead(decodeURIComponent(dirEntryId));
return;
}
}
@@ -3235,8 +3235,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
var newHtml = "";
for (var i = 0; i < dirEntryArray.length; i++) {
var dirEntry = dirEntryArray[i];
- newHtml += "\n" + (dirEntry.getTitleOrUrl()) + "";
+ // NB Ensure you use double quotes for HTML attributes below - see comment in populateListOfArticles
+ newHtml += '\n' + dirEntry.getTitleOrUrl() + '';
}
start = start ? start : 0;
var back = start ? '' + dirEntry.getTitleOrUrl() + '';
}
@@ -3388,11 +3393,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
/**
* Handles the click on the title of an article in search results
- * @param {Event} event
- * @returns {Boolean}
+ * @param {Event} event The click event to handle
+ * @returns {Boolean} Always returns false for JQuery event handling
*/
function handleTitleClick(event) {
- var dirEntryId = event.currentTarget.getAttribute("dirEntryId");
+ var dirEntryId = decodeURIComponent(event.target.getAttribute('dirEntryId'));
findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId);
return false;
}
@@ -3400,7 +3405,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
/**
* Creates an instance of DirEntry from given dirEntryId (including resolving redirects),
* and call the function to read the corresponding article
- * @param {String} dirEntryId
+ * @param {String} dirEntryId The stringified Directory Entry to parse and launch
*/
function findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId) {
if (appstate.selectedArchive.isReady()) {
@@ -3518,10 +3523,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
} else {
goToRetrievedContent(htmlContent);
}
-
}
}
-
}
var loaded = false;
@@ -3577,6 +3580,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
uiUtil.clearSpinner();
// If we reloaded the page to print the desktop style, we need to return to the printIntercept dialogue
if (params.printIntercept) printIntercept();
+ // Jump to any anchor parameter
+ if (anchorParameter) {
+ var target = articleWindow.document.getElementById(anchorParameter);
+ if (target) setTimeout(function () {
+ target.scrollIntoView();
+ }, 1000);
+ anchorParameter = '';
+ }
params.isLandingPage = false;
} else {
loaded = false;
@@ -3610,6 +3621,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
if (event.data.action === "askForContent") {
// The ServiceWorker asks for some content
var title = event.data.title;
+ if (!anchorParameter && event.data.anchorTarget) anchorParameter = event.data.anchorTarget;
var messagePort = event.ports[0];
var readFile = function (dirEntry) {
if (dirEntry === null) {
@@ -3808,6 +3820,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// This matches the data-kiwixurl of all tags containing rel="stylesheet" in raw HTML unless commented out
var regexpSheetHref = /(]*rel\s*=\s*["']stylesheet)[^>]*(?:href|data-kiwixurl)\s*=\s*["'])([^"']+)(["'][^>]*>)(?!\s*--\s*>)/ig;
+ // A string to hold any anchor parameter in clicked ZIM URLs (as we must strip these to find the article in the ZIM)
+ var anchorParameter;
+
params.containsMathTexRaw = false;
params.containsMathTex = false;
params.containsMathSVG = false;
@@ -4370,6 +4385,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// For Chromium browsers a small delay greatly improves composition
setTimeout(showArticle, 80);
}
+ // Jump to any anchor parameter
+ if (anchorParameter) {
+ var target = articleWindow.document.getElementById(anchorParameter);
+ if (target) setTimeout(function () {
+ target.scrollIntoView();
+ }, 1000);
+ anchorParameter = '';
+ }
params.isLandingPage = false;
};
@@ -4618,7 +4641,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// NB dirEntry.url can also contain path separator / in some ZIMs (Stackexchange). } and ] do not need to be escaped as they have no meaning on their own.
var escapedUrl = encodeURIComponent(dirEntry.url).replace(/([\\$^.|?*+/()[{])/g, '\\$1');
// Pattern to match a local anchor in an href even if prefixed by escaped url; will also match # on its own
- var regexpLocalAnchorHref = new RegExp('^(?:#|' + escapedUrl + '#)([^#]*$)');
+ // Note that we exclude any # with a semicolon between it and the end of the string, to avoid accidentally matching e.g. '
+ var regexpLocalAnchorHref = new RegExp('^(?:#|' + escapedUrl + '#)([^#;]*$)');
Array.prototype.slice.call(articleDocument.querySelectorAll('a, area')).forEach(function (anchor) {
// Attempts to access any properties of 'this' with malformed URLs causes app crash in Edge/UWP [kiwix-js #430]
try {
@@ -4629,12 +4653,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
var href = anchor.getAttribute('href');
if (href === null || href === undefined || /^javascript:/i.test(anchor.protocol)) return;
+ var anchorTarget = href.match(regexpLocalAnchorHref);
if (href.length === 0) {
// It's a link with an empty href, pointing to the current page: do nothing.
- } else if (regexpLocalAnchorHref.test(href)) {
+ } else if (anchorTarget) {
// It's a local anchor link : remove escapedUrl if any (see above)
- anchor.setAttribute('href', href.replace(/^[^#]*/, ''));
- } else if (anchor.protocol !== currentProtocol || anchor.host !== currentHost) {
+ anchor.setAttribute('href', '#' + anchorTarget[1]);
+ } else if (anchor.protocol !== currentProtocol ||
+ anchor.host !== currentHost) {
// It's an external URL : we should open it in a new tab
anchor.target = '_blank';
if (anchor.protocol === 'bingmaps:') {
@@ -4722,6 +4748,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
e.preventDefault();
e.stopPropagation();
+ anchorParameter = href.match(/#([^#;]+)$/);
+ anchorParameter = anchorParameter ? anchorParameter[1] : '';
var zimUrl = uiUtil.deriveZimUrlFromRelativeUrl(uriComponent, baseUrl);
goToArticle(zimUrl, downloadAttrValue, contentType);
setTimeout(reset, 1400);
diff --git a/www/js/lib/uiUtil.js b/www/js/lib/uiUtil.js
index a4368fb6..b4fc2fb5 100644
--- a/www/js/lib/uiUtil.js
+++ b/www/js/lib/uiUtil.js
@@ -635,10 +635,16 @@ define(rqDef, function(util) {
}
/**
+ * DEV: This function is no longer used in the project and could be removed unless it is of historical interest.
+ * It has been superseded by encodeURIComponent which encodes all characters except A-Z a-z 0-9 - _ . ! ~ * ' ( )
+ * The only character from below that is not encoded is apostrophe ('), but this does not need to be encoded to
+ * show correctly in our UI, given that it is an allowed character in bare URIs and the dirEntryId is enclosed
+ * in double quote marks ("..."). This has been successfully tested on titles with apostrophes.
+ *
* Encodes the html escape characters in the string before using it as html class name,id etc.
*
* @param {String} string The string in which html characters are to be escaped
- *
+ * @returns {String} The escaped HTML string
*/
function htmlEscapeChars(string) {
var escapechars = {