Fix incorrect processing of titles with question marks and anchor targets

Former-commit-id: 730b34cba56c719c842db89d32a231194c1936d2 [formerly cc5f175db052d5b5512fce257badce491dfda233] [formerly 4a24b19117e863c5c04e5af97abba97b38a21d2f] [formerly 2e4e0285ee487627769efde82d61899cb0d358bc [formerly bd0d782b89383d0710fcc6d716c99230e4695bc8 [formerly 6edc8b8f1085c0acb4c42a0346df97d9e9397242]]]
Former-commit-id: 0be596852e443c034a7004a42165a9cef16c304d [formerly a5d9b6b27ab5eab1d53240ad810a123cb3212954 [formerly 135c996f1316447cf2983dfa85240c50a07e879b]]
Former-commit-id: c68dd2c2e2e02522b86e42b5acf74ac2294f7425 [formerly cf5c52b09c85567422e34fc84a891311f709e215]
Former-commit-id: 014c13a8f0da8a3a1101d334e7464387bbf190e4
This commit is contained in:
Jaifroid 2022-01-23 15:10:51 +00:00
parent 3893c84838
commit 417620813a
3 changed files with 63 additions and 19 deletions

View File

@ -384,9 +384,12 @@ function fetchRequestFromZIM(fetchEvent) {
var prefix = regexpResult[1];
nameSpace = regexpResult[2];
title = regexpResult[3];
var anchorTarget = fetchEvent.request.url.match(/#([^#;]*)$/);
anchorTarget = anchorTarget ? anchorTarget[1] : '';
// We need to remove the potential parameters in the URL
title = removeUrlParameters(decodeURIComponent(title));
// We need to remove the potential parameters in the URL. Note that titles may contain question marks or hashes, so we test the
// encoded URI before decoding it. Be sure that you haven't encoded any querystring along with the URL, e.g. for clicked links.
title = decodeURIComponent(removeUrlParameters(title));
titleWithNameSpace = nameSpace + '/' + title;
@ -432,7 +435,8 @@ function fetchRequestFromZIM(fetchEvent) {
};
outgoingMessagePort.postMessage({
'action': 'askForContent',
'title': titleWithNameSpace
'title': titleWithNameSpace,
'anchorTarget': anchorTarget
}, [messageChannel.port2]);
});
}
@ -443,7 +447,13 @@ function fetchRequestFromZIM(fetchEvent) {
* @returns {String} The same URL without its parameters and anchors
*/
function removeUrlParameters(url) {
return url.replace(/([^?#]+)[?#].*$/, '$1');
// Remove any querystring
var strippedUrl = url.replace(/\?[^?]*$/, '');
// Remove any anchor parameters - note that IN PRACTICE anchor parameters cannot contain a semicolon because JavaScript maintains
// compatibility with HTML4, so we can avoid accidentally stripping e.g. ' by excluding an anchor if any semicolon is found
// between it and the end of the string. See https://stackoverflow.com/a/79022/9727685.
strippedUrl = strippedUrl.replace(/#[^#;]*$/, '');
return strippedUrl;
}
/**

View File

@ -243,7 +243,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
if (/Enter/.test(e.key)) {
if (activeElement.classList.contains('hover')) {
var dirEntryId = activeElement.getAttribute('dirEntryId');
findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId);
findDirEntryFromDirEntryIdAndLaunchArticleRead(decodeURIComponent(dirEntryId));
return;
}
}
@ -3235,8 +3235,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
var newHtml = "";
for (var i = 0; i < dirEntryArray.length; i++) {
var dirEntry = dirEntryArray[i];
newHtml += "\n<a class='list-group-item' href='#' dirEntryId='" + dirEntry.toStringId().replace(/'/g, "&apos;") +
"'>" + (dirEntry.getTitleOrUrl()) + "</a>";
// NB Ensure you use double quotes for HTML attributes below - see comment in populateListOfArticles
newHtml += '\n<a class="list-group-item" href="#" dirEntryId="' + encodeURIComponent(dirEntry.toStringId()) +
'">' + dirEntry.getTitleOrUrl() + '</a>';
}
start = start ? start : 0;
var back = start ? '<a href="#" data-start="' + (start - params.maxSearchResultsSize) +
@ -3362,7 +3363,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
var listLength = dirEntryArray.length < params.maxSearchResultsSize ? dirEntryArray.length : params.maxSearchResultsSize;
for (var i = 0; i < listLength; i++) {
var dirEntry = dirEntryArray[i];
var dirEntryStringId = uiUtil.htmlEscapeChars(dirEntry.toStringId());
// NB We use encodeURIComponent rather than encodeURI here because we know that any question marks in the title are not querystrings,
// and should be encoded [kiwix-js #806]. DEV: be very careful if you edit the dirEntryId attribute below, because the contents must be
// inside double quotes (in the final HTML string), given that dirEntryStringId may contain bare apostrophes
// Info: encodeURIComponent encodes all characters except A-Z a-z 0-9 - _ . ! ~ * ' ( )
var dirEntryStringId = encodeURIComponent(dirEntry.toStringId());
articleListDivHtml += '<a href="#" dirEntryId="' + dirEntryStringId +
'" class="list-group-item">' + dirEntry.getTitleOrUrl() + '</a>';
}
@ -3388,11 +3393,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
/**
* Handles the click on the title of an article in search results
* @param {Event} event
* @returns {Boolean}
* @param {Event} event The click event to handle
* @returns {Boolean} Always returns false for JQuery event handling
*/
function handleTitleClick(event) {
var dirEntryId = event.currentTarget.getAttribute("dirEntryId");
var dirEntryId = decodeURIComponent(event.target.getAttribute('dirEntryId'));
findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId);
return false;
}
@ -3400,7 +3405,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
/**
* Creates an instance of DirEntry from given dirEntryId (including resolving redirects),
* and call the function to read the corresponding article
* @param {String} dirEntryId
* @param {String} dirEntryId The stringified Directory Entry to parse and launch
*/
function findDirEntryFromDirEntryIdAndLaunchArticleRead(dirEntryId) {
if (appstate.selectedArchive.isReady()) {
@ -3518,10 +3523,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
} else {
goToRetrievedContent(htmlContent);
}
}
}
}
var loaded = false;
@ -3577,6 +3580,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
uiUtil.clearSpinner();
// If we reloaded the page to print the desktop style, we need to return to the printIntercept dialogue
if (params.printIntercept) printIntercept();
// Jump to any anchor parameter
if (anchorParameter) {
var target = articleWindow.document.getElementById(anchorParameter);
if (target) setTimeout(function () {
target.scrollIntoView();
}, 1000);
anchorParameter = '';
}
params.isLandingPage = false;
} else {
loaded = false;
@ -3610,6 +3621,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
if (event.data.action === "askForContent") {
// The ServiceWorker asks for some content
var title = event.data.title;
if (!anchorParameter && event.data.anchorTarget) anchorParameter = event.data.anchorTarget;
var messagePort = event.ports[0];
var readFile = function (dirEntry) {
if (dirEntry === null) {
@ -3808,6 +3820,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// This matches the data-kiwixurl of all <link> tags containing rel="stylesheet" in raw HTML unless commented out
var regexpSheetHref = /(<link\s+(?=[^>]*rel\s*=\s*["']stylesheet)[^>]*(?:href|data-kiwixurl)\s*=\s*["'])([^"']+)(["'][^>]*>)(?!\s*--\s*>)/ig;
// A string to hold any anchor parameter in clicked ZIM URLs (as we must strip these to find the article in the ZIM)
var anchorParameter;
params.containsMathTexRaw = false;
params.containsMathTex = false;
params.containsMathSVG = false;
@ -4370,6 +4385,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// For Chromium browsers a small delay greatly improves composition
setTimeout(showArticle, 80);
}
// Jump to any anchor parameter
if (anchorParameter) {
var target = articleWindow.document.getElementById(anchorParameter);
if (target) setTimeout(function () {
target.scrollIntoView();
}, 1000);
anchorParameter = '';
}
params.isLandingPage = false;
};
@ -4618,7 +4641,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
// NB dirEntry.url can also contain path separator / in some ZIMs (Stackexchange). } and ] do not need to be escaped as they have no meaning on their own.
var escapedUrl = encodeURIComponent(dirEntry.url).replace(/([\\$^.|?*+/()[{])/g, '\\$1');
// Pattern to match a local anchor in an href even if prefixed by escaped url; will also match # on its own
var regexpLocalAnchorHref = new RegExp('^(?:#|' + escapedUrl + '#)([^#]*$)');
// Note that we exclude any # with a semicolon between it and the end of the string, to avoid accidentally matching e.g. &#39;
var regexpLocalAnchorHref = new RegExp('^(?:#|' + escapedUrl + '#)([^#;]*$)');
Array.prototype.slice.call(articleDocument.querySelectorAll('a, area')).forEach(function (anchor) {
// Attempts to access any properties of 'this' with malformed URLs causes app crash in Edge/UWP [kiwix-js #430]
try {
@ -4629,12 +4653,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
var href = anchor.getAttribute('href');
if (href === null || href === undefined || /^javascript:/i.test(anchor.protocol)) return;
var anchorTarget = href.match(regexpLocalAnchorHref);
if (href.length === 0) {
// It's a link with an empty href, pointing to the current page: do nothing.
} else if (regexpLocalAnchorHref.test(href)) {
} else if (anchorTarget) {
// It's a local anchor link : remove escapedUrl if any (see above)
anchor.setAttribute('href', href.replace(/^[^#]*/, ''));
} else if (anchor.protocol !== currentProtocol || anchor.host !== currentHost) {
anchor.setAttribute('href', '#' + anchorTarget[1]);
} else if (anchor.protocol !== currentProtocol ||
anchor.host !== currentHost) {
// It's an external URL : we should open it in a new tab
anchor.target = '_blank';
if (anchor.protocol === 'bingmaps:') {
@ -4722,6 +4748,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
}
e.preventDefault();
e.stopPropagation();
anchorParameter = href.match(/#([^#;]+)$/);
anchorParameter = anchorParameter ? anchorParameter[1] : '';
var zimUrl = uiUtil.deriveZimUrlFromRelativeUrl(uriComponent, baseUrl);
goToArticle(zimUrl, downloadAttrValue, contentType);
setTimeout(reset, 1400);

View File

@ -635,10 +635,16 @@ define(rqDef, function(util) {
}
/**
* DEV: This function is no longer used in the project and could be removed unless it is of historical interest.
* It has been superseded by encodeURIComponent which encodes all characters except A-Z a-z 0-9 - _ . ! ~ * ' ( )
* The only character from below that is not encoded is apostrophe ('), but this does not need to be encoded to
* show correctly in our UI, given that it is an allowed character in bare URIs and the dirEntryId is enclosed
* in double quote marks ("..."). This has been successfully tested on titles with apostrophes.
*
* Encodes the html escape characters in the string before using it as html class name,id etc.
*
* @param {String} string The string in which html characters are to be escaped
*
* @returns {String} The escaped HTML string
*/
function htmlEscapeChars(string) {
var escapechars = {