More intelligent repositioning of hatnotes

Former-commit-id: ae3ef5abb48c89fc6bd22b7e9372b41509bfbd66 [formerly 829f822895da944cb89c5c6f45799c63cdb9eee3] [formerly 5a73c4da9e53913c48e79eb6263dc3d329d36fe0] [formerly 4d18e9326910def85ce1305704987a29537a9c76 [formerly 6ba055cac8a989778d63e337987968cf55c7d545 [formerly 916c73d1db2c7b399abcc73ee6a4c52e6d8feb6b]]]
Former-commit-id: 32b7231148bd7f3e4f3328a971917cd99531de10 [formerly 5cf1e79bb0be09c3caa55f50b76f1c322387a61c [formerly 995784f6a074b5fadf4591f49580d883c699ecdf]]
Former-commit-id: c6139da7ce267491507a0318d3eae64e4917b1aa [formerly 08806df458a2f221e7b9b6ac043e6d0332714b19]
Former-commit-id: 65f67d63fd4bbb2dd75ff52c654b6967421c169c
This commit is contained in:
Jaifroid 2021-11-18 11:55:29 +00:00
parent ec5c86c1c7
commit 243c1284ce
2 changed files with 16 additions and 11 deletions

View File

@ -3754,16 +3754,20 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'util', 'cache', 'images', 'sett
var hatnote;
var hatnotes = [];
do {
hatnote = htmlArticle.match(/<h1\b(?:[^<]|<(?!h2))+?((?:<div\s+[^>]+\b(?:hatnote|homonymie|dablink)\b[\s\S]+?<\/div>\s*)+)/i);
if (hatnote) {
htmlArticle = htmlArticle.replace(hatnote[1], '');
hatnotes.push(hatnote[1]);
hatnote = util.matchOuter(htmlArticle, '<div\\b[^>]+\\b(?:hatnote|homonymie|dablink)\\b', '</div>\\s*', 'i');
if (hatnote && hatnote.length) {
// Ensure the next matching hatnote is under h1
if (/(?:<h1\b(?:[^<]|<(?!h2))+?)<div\s+[^>]+\bhatnote|homonymie|dablink\b/i.test(htmlArticle)) {
htmlArticle = htmlArticle.replace(hatnote[0], '');
hatnotes.push(hatnote[0]);
} else {
break;
}
}
} while (hatnote);
if (hatnotes.length) {
hatnotes.forEach(function (hnt) {
htmlArticle = htmlArticle.replace(/(<\/h1>\s*)/i, "$1" + hnt.replace(/(<div\s+)/i, '$1style="padding-top:10px;" '));
});
} while (hatnote.length);
// Ensure we replace them in the right order
for (var i = hatnotes.length; i--;) {
htmlArticle = htmlArticle.replace(/(<\/h1>\s*)/i, "$1" + hatnotes[i].replace(/(<div\s+)/i, '$1style="padding-top:10px;" '));
}
// Put misplaced disambiguation header back in its correct position @TODO remove this when fixed in mw-offliner
var noexcerpt = htmlArticle.match(/<h1\b(?:[^<]|<(?!h2))+?(<dl\b(?:[^<]|<(?!\/dl>)){1,50}?(?:For\sother\splaces\swith\sthe\ssame\sname|Not\sto\sbe\sconfused\swith|mw-redirect[^<]+travel\stopic|This\sarticle\sis\sa|See\salso:)(?:[^<]|<(?!\/dl>))+<\/dl>\s*)/i);

View File

@ -408,16 +408,17 @@ define([], function() {
* @param {string} left - Regex string of opening pattern to match, e.g. '<table\\b[^>]*>' matches <table> or <table ...>.
* @param {string} right - Regex string of closing pattern to match, e.g. '</table>'. Must not be equal to left.
* @param {string} flags - Regex flags, if any, such as 'gi' (= match globally and case insensitive).
* @param {string} prefix - An optional Regex string that must be present before <left> for the regex to match
* @returns {Array} An array of matches.
*/
function matchOuter(str, left, right, flags) {
function matchOuter(str, left, right, flags, prefix) {
flags = flags || "";
var f = flags.replace(/g/g, ""),
g = flags.indexOf("g") > -1,
l = new RegExp(left, f),
//Creates a neutral middle value if left is a well-formed regex for an html tag with attributes
mid = /^(<[^\\]+)\\/.test(left) ? left.replace(/^(<[^\\]+)[\S\s]+$/, "$1") + '\\b[^>]*>' : "",
x = new RegExp((mid ? mid : left) + "|" + right, "g" + f),
x = new RegExp((prefix ? prefix : '') + (mid ? mid : left) + "|" + right, "g" + f),
a = [],
t, s, m;
mid = mid ? new RegExp(mid, f) : l;