/**
* util.js : Utility functions
*
* Copyright 2013-2014 Mossroy and contributors
* License GPL v3:
*
* This file is part of Kiwix.
*
* Kiwix is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Kiwix is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kiwix (file LICENSE-GPLv3.txt). If not, see
*/
'use strict';
define(['q'], function (q) {
/**
* Utility function : return true if the given string ends with the suffix
* @param {String} str
* @param {String} suffix
* @returns {Boolean}
*/
function endsWith(str, suffix) {
return str.indexOf(suffix, str.length - suffix.length) !== -1;
}
/**
* Returns the same String with the first letter in upper-case
* @param {String} string
* @returns {String}
*/
function ucFirstLetter(string) {
if (string && string.length >= 1) {
return string[0].toLocaleUpperCase() + string.slice(1);
} else {
return string;
}
}
/**
* Returns the same String with the first letter in lower-case
* @param {String} string
* @returns {String}
*/
function lcFirstLetter(string) {
if (string) {
if (string.length >= 1) {
return string.charAt(0).toLocaleLowerCase() + string.slice(1);
} else {
return string;
}
} else {
return string;
}
}
/**
* Returns the same String with the first letter of every word in upper-case
* @param {String} string
* @returns {String}
*/
function ucEveryFirstLetter(string) {
if (string) {
return string.replace(/\b\w/g, function (m) {
return m.toLocaleUpperCase();
});
} else {
return string;
}
}
/**
* Generates an array of DirEntry, where all duplicates (same title) have been removed
* (it also sorts them on their title)
*
* @param {Array.} array of DirEntry
* @returns {Array.} same array of DirEntry, without duplicates
*/
function removeDuplicateTitlesInDirEntryArray(array) {
array.sort(function (dirEntryA, dirEntryB) {
if (dirEntryA.title < dirEntryB.title) return -1;
if (dirEntryA.title > dirEntryB.title) return 1;
return 0;
});
for (var i = 1; i < array.length;) {
if (array[i - 1].title === array[i].title) {
array.splice(i, 1);
} else {
i++;
}
}
return array;
}
/**
* Generates an array of Strings, where all duplicates have been removed
* (without changing the order)
* It is optimized for small arrays.
* Source : http://codereview.stackexchange.com/questions/60128/removing-duplicates-from-an-array-quickly
*
* @param {Array} array of String
* @returns {Array} same array of Strings, without duplicates
*/
function removeDuplicateStringsInSmallArray(array) {
var unique = [];
for (var i = 0; i < array.length; i++) {
var current = array[i];
if (unique.indexOf(current) < 0)
unique.push(current);
}
return unique;
}
/**
* Read an integer encoded in 4 bytes, little endian
* @param {Array} byteArray
* @param {Integer} firstIndex
* @returns {Integer}
*/
function readIntegerFrom4Bytes(byteArray, firstIndex) {
var dataView = new DataView(byteArray.buffer, firstIndex, 4);
var integer = dataView.getUint32(0, true);
return integer;
}
/**
* Read an integer encoded in 2 bytes, little endian
* @param {Array} byteArray
* @param {Integer} firstIndex
* @returns {Integer}
*/
function readIntegerFrom2Bytes(byteArray, firstIndex) {
var dataView = new DataView(byteArray.buffer, firstIndex, 2);
var integer = dataView.getUint16(0, true);
return integer;
}
/**
* Read a float encoded in 2 bytes
* @param {Array} byteArray
* @param {Integer} firstIndex
* @param {Boolean} littleEndian (optional)
* @returns {Float}
*/
function readFloatFrom4Bytes(byteArray, firstIndex, littleEndian) {
var dataView = new DataView(byteArray.buffer, firstIndex, 4);
var float = dataView.getFloat32(0, littleEndian);
return float;
}
/**
* Convert a Uint8Array to a lowercase hex string
* @param {Array} byteArray
* @returns {String}
*/
function uint8ArrayToHex(byteArray) {
var s = '';
var hexDigits = '0123456789abcdef';
for (var i = 0; i < byteArray.length; i++) {
var v = byteArray[i];
s += hexDigits[(v & 0xff) >> 4];
s += hexDigits[v & 0xf];
}
return s;
}
/**
* Convert a Uint8Array to base64
* TODO : might be replaced by btoa() built-in function? https://developer.mozilla.org/en-US/docs/Web/API/window.btoa
* @param {Array} byteArray
* @returns {String}
*/
function uint8ArrayToBase64(byteArray) {
var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
var bits, h1, h2, h3, h4, i = 0;
var enc = "";
for (var i = 0; i < byteArray.length;) {
bits = byteArray[i++] << 16;
bits |= byteArray[i++] << 8;
bits |= byteArray[i++];
h1 = bits >> 18 & 0x3f;
h2 = bits >> 12 & 0x3f;
h3 = bits >> 6 & 0x3f;
h4 = bits & 0x3f;
enc += b64[h1] + b64[h2] + b64[h3] + b64[h4];
}
var r = byteArray.length % 3;
return (r > 0 ? enc.slice(0, r - 3) : enc) + '==='.slice(r || 3);
}
/**
* Reads a Uint8Array from the given file starting at byte offset begin and
* for given size.
* @param {File} file
* @param {Integer} begin
* @param {Integer} size
* @returns {Promise} Promise
*/
function readFileSlice(file, begin, size) {
var deferred = q.defer();
var reader = new FileReader();
reader.onload = function (e) {
deferred.resolve(new Uint8Array(e.target.result));
};
reader.onerror = reader.onabort = function (e) {
deferred.reject(e);
};
reader.readAsArrayBuffer(file.slice(begin, begin + size));
return deferred.promise;
}
/**
* Performs a binary search on indices begin <= i < end, utilizing query(i) to return where to
* continue the search.
* If lowerBound is not set, returns only indices where query returns 0 and null otherwise.
* If lowerBound is set, returns the smallest index where query does not return > 0.
* @param {Integer} begin
* @param {Integer} end
* @param query Function
* @param {Boolean} lowerBound
*/
function binarySearch(begin, end, query, lowerBound) {
if (end <= begin)
return lowerBound ? begin : null;
var mid = Math.floor((begin + end) / 2);
return query(mid).then(function(decision)
{
if (decision == -1)
return binarySearch(begin, mid, query, lowerBound);
else if (decision == 1)
return binarySearch(mid + 1, end, query, lowerBound);
else if (decision == -2)
return binarySearch(begin, end - 1, query, lowerBound)
else if (decision == 2)
return binarySearch(begin + 1, end, query, lowerBound)
else
return mid;
});
};
/**
* Converts a Base64 Content to a Blob
* From https://stackoverflow.com/questions/16245767/creating-a-blob-from-a-base64-string-in-javascript
*
* @param {String} b64Data Base64-encoded data
* @param {String} contentType
* @param {Integer} sliceSize
* @returns {Blob}
*/
function b64toBlob(b64Data, contentType, sliceSize) {
contentType = contentType || '';
sliceSize = sliceSize || 512;
var byteCharacters = atob(b64Data);
var byteArrays = [];
for (var offset = 0; offset < byteCharacters.length; offset += sliceSize) {
var slice = byteCharacters.slice(offset, offset + sliceSize);
var byteNumbers = new Array(slice.length);
for (var i = 0; i < slice.length; i++) {
byteNumbers[i] = slice.charCodeAt(i);
}
var byteArray = new Uint8Array(byteNumbers);
byteArrays.push(byteArray);
}
var blob = new Blob(byteArrays, { type: contentType });
return blob;
}
/**
* Converts a UInt Array to a UTF-8 encoded string
* source : http://michael-rushanan.blogspot.de/2014/03/javascript-uint8array-hacks-and-cheat.html
*
* @param {UIntArray} uintArray
* @returns {String}
*/
function uintToString(uintArray) {
var s = '';
for (var i = 0; i < uintArray.length; i++) {
s += String.fromCharCode(uintArray[i]);
}
return s;
}
/**
* Does a "left shift" on an integer.
* It is equivalent to int << bits (which works only on 32-bit integers),
* but compatible with 64-bit integers.
*
* @param {Integer} int
* @param {Integer} bits
* @returns {Integer}
*/
function leftShift(int, bits) {
return int * Math.pow(2, bits);
}
/**
* Matches the outermost balanced constructs and their contents
* even if they have nested balanced constructs within them
* e.g., outer HTML of a pair of opening and closing tags.
* Left and right constructs must not be equal. Backreferences in right are not supported.
* Double backslash (\\) any backslash characters, e.g. '\\b'.
* If you do not set the global flag ('g'), then only contents of first construct will be returned.
* Adapted from http://blog.stevenlevithan.com/archives/javascript-match-recursive-regexp
* (c) 2007 Steven Levithan - MIT Licence https://opensource.org/licenses/MIT
*
* @param {string} str - String to be searched.
* @param {string} left - Regex string of opening pattern to match, e.g. ']*>' matches or .
* @param {string} right - Regex string of closing pattern to match, e.g. '
'. Must not be equal to left.
* @param {string} flags - Regex flags, if any, such as 'gi' (= match globally and case insensitive).
* @returns {Array} An array of matches.
*/
function matchOuter(str, left, right, flags) {
flags = flags || "";
var f = flags.replace(/g/g, ""),
g = flags.indexOf("g") > -1,
l = new RegExp(left, f),
//Creates a neutral middle value if left is a well-formed regex for an html tag with attributes
mid = /^(<[^\\]+)\\/.test(left) ? left.replace(/^(<[^\\]+)[\S\s]+$/, "$1") + '\\b[^>]*>' : "",
x = new RegExp((mid ? mid : left) + "|" + right, "g" + f),
a = [],
t, s, m;
mid = mid ? new RegExp(mid, f) : l;
do {
t = 0;
while (m = x.exec(str)) {
if ((t ? mid : l).test(m[0])) {
if (!t++) s = m.index;
} else if (t) {
if (!--t) {
a.push(str.slice(s, x.lastIndex));
if (!g) return a;
}
}
}
} while (t && (x.lastIndex = s));
return a;
}
/**
* Matches the contents of innermost HTML elements even if they are nested inside other elements.
* Left and right strings must be opening and closing HTML or XML elements. Backreferences are not supported.
* If you do not set the global flag ('g'), then only contents of first match will be returned.
* Double backslash (\\) any backslash characters, e.g. '\\b'.
* Adapted from http://blog.stevenlevithan.com/archives/match-innermost-html-element
*
* @param {string} str - String to be searched.
* @param {string} left - Regex string of opening element to match, e.g. ']*>', must include '<' and '>'.
* @param {string} right - Regex string of closing element to match, e.g. '
'.
* @param {string} flags - Regex flags, if any, such as 'gi' (= match globally and case insensitive).
* @returns {Array} A RegExp array of matches.
*/
function matchInner(str, left, right, flags) {
flags = flags || "";
var x = new RegExp(left + '(?:(?=([^<]+))\\1|<(?!' + left.replace(/^, "") + '))*?' + right, flags);
return str.match(x);
}
// Defines an object and functions for searching and highlighting text in a node
//
// Original JavaScript code by Chirp Internet: www.chirp.com.au
// Please acknowledge use of this code by including this header.
// For documentation see: http://www.the-art-of-web.com/javascript/search-highlight/
function Hilitor(node, tag) {
var targetNode = node || document.body;
var hiliteTag = tag || "EM";
var skipTags = new RegExp("^(?:" + hiliteTag + "|SCRIPT|FORM)$");
var colors = ["#ff6", "#a0ffff", "#9f9", "#f99", "#f6f"];
var className = "hilitor";
var wordColor = [];
var colorIdx = 0;
var matchRegex = "";
//Add any symbols that may prefix a start string and don't match \b (word boundary)
var leadingSymbols = "‘'\"“([«¿¡!*-";
var openLeft = false;
var openRight = false;
this.setMatchType = function (type) {
switch (type) {
case "left":
this.openLeft = false;
this.openRight = true;
break;
case "right":
this.openLeft = true;
this.openRight = false;
break;
case "open":
this.openLeft = this.openRight = true;
break;
default:
this.openLeft = this.openRight = false;
}
};
function addAccents(input) {
var retval = input;
retval = retval.replace(/([ao])e/ig, "$1");
retval = retval.replace(/\\u00[CE][0124]/ig, "a");
retval = retval.replace(/\\u00E7/ig, "c");
retval = retval.replace(/\\u00E[89AB]|\\u00C[9A]/ig, "e");
retval = retval.replace(/\\u00[CE][DEF]/ig, "i");
retval = retval.replace(/\\u00[DF]1/ig, "n");
retval = retval.replace(/\\u00[FD][346]/ig, "o");
retval = retval.replace(/\\u00F[9BC]/ig, "u");
retval = retval.replace(/\\u00FF/ig, "y");
retval = retval.replace(/\\u00DF/ig, "s");
retval = retval.replace(/'/ig, "['’‘]");
retval = retval.replace(/c/ig, "[cç]");
retval = retval.replace(/e/ig, "[eèéêë]");
retval = retval.replace(/a/ig, "([aàâäá]|ae)");
retval = retval.replace(/i/ig, "[iîïíì]");
retval = retval.replace(/n/ig, "[nñ]");
retval = retval.replace(/o/ig, "([oôöó]|oe)");
retval = retval.replace(/u/ig, "[uùûüú]");
retval = retval.replace(/y/ig, "[yÿ]");
retval = retval.replace(/s/ig, "(ss|[sß])");
return retval;
}
this.setRegex = function (input) {
input = input.replace(/\\([^u]|$)/g, "$1");
input = input.replace(/[^\w\\\s']+/g, "").replace(/\s+/g, "|");
input = input.replace(/^\||\|$/g, "");
input = addAccents(input);
if (input) {
var re = "(" + input + ")";
if (!this.openLeft) re = "(?:^|[\\b\\s" + leadingSymbols + "])" + re;
if (!this.openRight) re = re + "(?:[\\b\\s]|$)";
matchRegex = new RegExp(re, "i");
return true;
}
return false;
};
this.getRegex = function () {
var retval = matchRegex.toString();
retval = retval.replace(/(^\/|\(\?:[^\)]+\)|\/i$)/g, "");
return retval;
};
this.countFullMatches = function (input) {
if (node === undefined || !node) return;
var strippedText = node.innerHTML.replace(/]*>[\s\S]*<\/title>/i, "");
strippedText = node.innerHTML.replace(/<[^>]*>\s*/g, " ");
if (!strippedText) return 0;
strippedText = strippedText.replace(/(?: |\r?\n|[.,;:?!¿¡-])+/g, " ");
strippedText = strippedText.replace(/\s+/g, " ");
if (!strippedText.length) return 0;
input = input.replace(/[\s.,;:?!¿¡-]+/g, " ");
var inputMatcher = new RegExp(input, "ig");
var matches = strippedText.match(inputMatcher);
if (matches) return matches.length
else return 0;
}
this.countPartialMatches = function () {
if (node === undefined || !node) return;
var matches = matchInner(node.innerHTML, '<' + hiliteTag + '\\b[^>]*class="hilitor"[^>]*>', '' + hiliteTag + '>', 'gi');
if (matches) return matches.length
else return 0;
}
this.scrollFrom = 0;
this.lastScrollValue = "";
this.scrollToFullMatch = function (input, scrollFrom) {
if (node === undefined || !node) return;
if (!input) return;
//Normalize spaces
input = input.replace(/\s+/g, " ");
var inputWords = input.split(" ");
var testInput = addAccents(input);
var testInput = new RegExp(testInput, "i");
var hilitedNodes = node.getElementsByClassName(className);
var subNodes = [];
var start = scrollFrom || 0;
start = start >= hilitedNodes.length ? 0 : start;
var end = start + inputWords.length;
for (start; start < hilitedNodes.length; start++) {
for (var f = start; f < end; f++) {
if (f == hilitedNodes.length) break;
subNodes.push(hilitedNodes[f].innerHTML);
}
var nodeText = subNodes.join(" ");
if (testInput.test(nodeText)) {
//hilitedNodes[start].scrollIntoView(true);
$("#articleContent").contents().scrollTop($(hilitedNodes[start]).offset().top - window.innerHeight / 3);
break;
} else {
if (f == hilitedNodes.length && scrollFrom > 0) {
//Restart search from top of page
scrollFrom = 0;
start = 0;
}
}
subNodes = [];
end++;
}
return start + inputWords.length;
}
// recursively apply word highlighting
this.hiliteWords = function (node) {
if (node === undefined || !node) return;
if (!matchRegex) return;
if (skipTags.test(node.nodeName)) return;
if (node.hasChildNodes()) {
for (var i = 0; i < node.childNodes.length; i++)
this.hiliteWords(node.childNodes[i]);
}
if (node.nodeType == 3) { // NODE_TEXT
var nv, regs;
if ((nv = node.nodeValue) && (regs = matchRegex.exec(nv))) {
if (!wordColor[regs[1].toLowerCase()]) {
wordColor[regs[1].toLowerCase()] = colors[colorIdx++ % colors.length];
}
var match = document.createElement(hiliteTag);
match.appendChild(document.createTextNode(regs[1]));
match.style.setProperty("background-color", wordColor[regs[1].toLowerCase()], "important");
match.style.fontStyle = "inherit";
match.style.color = "#000";
match.className = className;
var after;
// In case of leading whitespace or other symbols
var leadR = new RegExp("^[\\s" + leadingSymbols + "]");
if (leadR.test(regs[0])) {
after = node.splitText(regs.index + 1);
} else {
after = node.splitText(regs.index);
}
after.nodeValue = after.nodeValue.substring(regs[1].length);
node.parentNode.insertBefore(match, after);
}
};
};
// remove highlighting
this.remove = function () {
if (typeof node.innerHTML == "undefined") return;
var arr = node.getElementsByClassName(className), el;
while (arr.length && (el = arr[0])) {
var parent = el.parentNode;
parent.replaceChild(el.firstChild, el);
parent.normalize();
}
};
// start highlighting at target node
this.apply = function (input) {
this.remove();
if (input === undefined || !(input = input.replace(/(^\s+|\s+$)/g, ""))) return;
input = convertCharStr2jEsc(input);
if (this.setRegex(input)) {
this.hiliteWords(targetNode);
}
};
// added by Yanosh Kunsh to include utf-8 string comparison
function dec2hex4(textString) {
var hexequiv = new Array("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F");
return hexequiv[(textString >> 12) & 0xF] + hexequiv[(textString >> 8) & 0xF] + hexequiv[(textString >> 4) & 0xF] + hexequiv[textString & 0xF];
}
function convertCharStr2jEsc(str, cstyle) {
// Converts a string of characters to JavaScript escapes
// str: sequence of Unicode characters
var highsurrogate = 0;
var suppCP;
var pad;
var n = 0;
var outputString = '';
for (var i = 0; i < str.length; i++) {
var cc = str.charCodeAt(i);
if (cc < 0 || cc > 0xFFFF) {
outputString += '!Error in convertCharStr2UTF16: unexpected charCodeAt result, cc=' + cc + '!';
}
if (highsurrogate != 0) { // this is a supp char, and cc contains the low surrogate
if (0xDC00 <= cc && cc <= 0xDFFF) {
suppCP = 0x10000 + ((highsurrogate - 0xD800) << 10) + (cc - 0xDC00);
if (cstyle) {
pad = suppCP.toString(16);
while (pad.length < 8) {
pad = '0' + pad;
}
outputString += '\\U' + pad;
} else {
suppCP -= 0x10000;
outputString += '\\u' + dec2hex4(0xD800 | (suppCP >> 10)) + '\\u' + dec2hex4(0xDC00 | (suppCP & 0x3FF));
}
highsurrogate = 0;
continue;
} else {
outputString += 'Error in convertCharStr2UTF16: low surrogate expected, cc=' + cc + '!';
highsurrogate = 0;
}
}
if (0xD800 <= cc && cc <= 0xDBFF) { // start of supplementary character
highsurrogate = cc;
} else { // this is a BMP character
switch (cc) {
case 0:
outputString += '\\0';
break;
case 8:
outputString += '\\b';
break;
case 9:
outputString += '\\t';
break;
case 10:
outputString += '\\n';
break;
case 13:
outputString += '\\r';
break;
case 11:
outputString += '\\v';
break;
case 12:
outputString += '\\f';
break;
case 34:
outputString += '\\\"';
break;
case 39:
outputString += '\\\'';
break;
case 92:
outputString += '\\\\';
break;
default:
if (cc > 0x1f && cc < 0x7F) {
outputString += String.fromCharCode(cc);
} else {
pad = cc.toString(16).toUpperCase();
while (pad.length < 4) {
pad = '0' + pad;
}
outputString += '\\u' + pad;
}
}
}
}
return outputString;
}
}
// Does a forward search for el forward through next siblings, or up the DOM tree then forward
// Returns if fn(el) is true, or else keeps searching to end of DOM
function getClosestForward(el, fn) {
return el && (fn(el) ? el : getClosestForward(el.nextElementSibling, fn)) ||
el && (fn(el) ? el : getClosestForward(el.parentNode, fn));
}
// Does a reverse search for el back through previous siblings, or up the DOM tree then back
// Returns if fn(el) is true, or else keeps searching to top of DOM
function getClosestBack(el, fn) {
return el && (fn(el) ? el : getClosestBack(el.previousElementSibling, fn)) ||
el && (fn(el) ? el : getClosestBack(el.parentNode, fn));
}
/**
* Functions and classes exposed by this module
*/
return {
endsWith: endsWith,
ucFirstLetter: ucFirstLetter,
lcFirstLetter: lcFirstLetter,
ucEveryFirstLetter: ucEveryFirstLetter,
removeDuplicateTitlesInDirEntryArray: removeDuplicateTitlesInDirEntryArray,
removeDuplicateStringsInSmallArray: removeDuplicateStringsInSmallArray,
readIntegerFrom4Bytes: readIntegerFrom4Bytes,
readIntegerFrom2Bytes: readIntegerFrom2Bytes,
readFloatFrom4Bytes: readFloatFrom4Bytes,
uint8ArrayToHex: uint8ArrayToHex,
uint8ArrayToBase64: uint8ArrayToBase64,
readFileSlice: readFileSlice,
binarySearch: binarySearch,
b64toBlob: b64toBlob,
uintToString: uintToString,
leftShift: leftShift,
matchOuter: matchOuter,
matchInner: matchInner,
Hilitor: Hilitor,
getClosestForward: getClosestForward,
getClosestBack: getClosestBack
};
});