This commit is contained in:
mhutti1 2016-10-15 15:33:46 +01:00
commit d326603662
2 changed files with 537 additions and 221 deletions

72
src/server/benchmark.sh Executable file
View File

@ -0,0 +1,72 @@
#!/bin/bash
# Parse command line
zim=$1
serve=$2
if (( $# != 2 ))
then
echo "Usage: ./benchmark ZIM_PATH KIWIX-SERVE_PATH"
exit 1
fi
# Constants
delay=0.01
top_log=/tmp/top.log
top_calc=/tmp/top.calc
# Available memory
memory_kb=`cat /proc/meminfo | grep "MemTotal" | tr -d " " | cut -f2 -d: | sed -e 's/[^0123456789]//g'`
memory_mb=`echo "scale=2;$memory_kb/1024" | bc -l`
# Start Kiwix-serve
echo "Starting kiwix-serve to be tested..."
$serve --port=8080 "$zim" &
serve_pid=$!
# Start top to monitor resource usage
top -d $delay -b -p $serve_pid | grep $serve_pid > "$top_log" &
top_pid=$!
# Print environnement informations
echo "Process to monitor: $serve_pid"
echo "Monitoring process: $top_pid"
echo "Monitoring delay: $delay s"
echo "ZIM file path: $zim"
echo "Kiwix-serve path: $serve"
echo "Total memory: $memory_mb MB"
echo
# Compute artice list
echo "Computing article list snippet..."
articles=`zimdump -l $zim | grep ".html" | shuf | head -n 1000`
# Run wget against kiwix-serve
start_date=`date +%s`
for LINE in $articles
do
echo "Scrapping $LINE..."
wget --quiet p -P /dev/shm/tmp "http://localhost:8080/wikipedia_en_medicine_2016-09/A/$LINE"
rm -rf /dev/shm/tmp
done
end_date=`date +%s`
# Kill top instance
kill -s STOP $top_pid 2>&1 > /dev/null
sed -i '$ d' "$top_log"
times=`cat "$top_log" | wc -l`
# Compute KPI
duration=`echo "$end_date-$start_date" | bc -l`
cpu_percent=`cat "$top_log" | sed -r -e "s;\s\s*; ;g" -e "s;^ *;;" | cut -d' ' -f9 | tr '\n' '+0' | sed -r -e "s;(.*)[+]$;\1;" -e "s/.*/scale=2\n(&)\/$times\nquit\n/" > "$top_calc" ; bc -q "$top_calc"`
memory_percent=`cat "$top_log" | sed -r -e "s;\s\s*; ;g" -e "s;^ *;;" | cut -d' ' -f10 | tr '\n' '+' | sed -r -e "s;(.*)[+]$;\1;" -e "s/.*/scale=2\n(&)\/$times\nquit\n/" > "$top_calc" ; bc -q "$top_calc"`
memory_absolut=`echo "scale=2;$memory_mb/100*$memory_percent" | bc -l`
echo
echo "Measure count: $times"
echo "Duration: $duration s"
echo "CPU (average): $cpu_percent %"
echo "Memory (average): $memory_absolut MB"
# Kill kiwix-serve & top instances
kill -s STOP $serve_pid 2>&1 > /dev/null
#rm "$top_log" "$top_calc"

View File

@ -144,6 +144,420 @@ bool isVerbose() {
static Bytef *compr = (Bytef *)malloc(COMPRESSOR_BUFFER_SIZE);
static uLongf comprLen;
static
bool compress_content(string &content,
const string &mimeType)
{
/* Compute the lengh */
unsigned int contentLength = content.size();
/* Should be deflate */
bool deflated =
contentLength > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE &&
contentLength < COMPRESSOR_BUFFER_SIZE &&
(mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos);
/* Compress the content if necessary */
if (deflated) {
pthread_mutex_lock(&compressorLock);
comprLen = COMPRESSOR_BUFFER_SIZE;
compress(compr, &comprLen, (const Bytef*)(content.data()), contentLength);
if (comprLen > 2 && comprLen < contentLength) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
compr += 2;
content = string((char *)compr, comprLen);
contentLength = comprLen;
} else {
deflated = false;
}
pthread_mutex_unlock(&compressorLock);
}
return deflated;
}
static
struct MHD_Response* build_response(const void* data,
unsigned int length,
const std::string& httpRedirection,
const std::string& mimeType,
bool deflated,
bool cacheEnabled)
{
/* Create the response */
struct MHD_Response * response = MHD_create_response_from_data(length,
const_cast<void*>(data),
MHD_NO,
MHD_YES);
/* Make a redirection if necessary otherwise send the content */
if (!httpRedirection.empty()) {
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, httpRedirection.c_str());
} else {
/* Add if necessary the content-encoding */
if (deflated) {
MHD_add_response_header(response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
}
/* Force to close the connection - cf. 100% CPU usage with v. 4.4 (in Lucid) */
//MHD_add_response_header(response, MHD_HTTP_HEADER_CONNECTION, "close");
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
if (cacheEnabled) { /* Force cache */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
} else { /* Prevent cache (for random page) */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "no-cache, no-store, must-revalidate");
}
return response;
}
ssize_t callback_reader_from_blob(void *cls,
uint64_t pos,
char *buf,
size_t max)
{
zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&readerLock);
size_t max_size_to_set = min(max, blob->size()-pos);
if (max_size_to_set <= 0)
{
pthread_mutex_unlock(&readerLock);
return MHD_CONTENT_READER_END_WITH_ERROR;
}
memcpy(buf, blob->data()+pos, max_size_to_set);
pthread_mutex_unlock(&readerLock);
return max_size_to_set;
}
void callback_free_blob(void *cls)
{
zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&readerLock);
delete blob;
pthread_mutex_unlock(&readerLock);
}
static
struct MHD_Response* build_callback_response_from_blob(zim::Blob& blob,
const std::string& mimeType)
{
pthread_mutex_lock(&readerLock);
zim::Blob* p_blob = new zim::Blob(blob);
struct MHD_Response * response = MHD_create_response_from_callback(blob.size(),
16384,
callback_reader_from_blob,
p_blob,
callback_free_blob);
pthread_mutex_unlock(&readerLock);
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
return response;
}
static
struct MHD_Response* handle_suggest(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content;
std::string mimeType;
unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0;
std::string suggestion;
/* Get the suggestion pattern from the HTTP request */
const char* cTerm = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "term");
std::string term = cTerm == NULL ? "" : cTerm;
if (isVerbose()) {
std::cout << "Searching suggestions for: \"" << term << "\"" << endl;
}
/* Get the suggestions */
content = "[";
reader->searchSuggestionsSmart(term, maxSuggestionCount);
while (reader->getNextSuggestion(suggestion)) {
kiwix::stringReplacement(suggestion, "\"", "\\\"");
content += (content == "[" ? "" : ",");
content += "{\"value\":\"" + suggestion + "\",\"label\":\"" + suggestion + "\"}";
suggestionCount++;
}
/* Propose the fulltext search if possible */
if (searcher != NULL) {
content += (suggestionCount == 0 ? "" : ",");
content += "{\"value\":\"" + std::string(term) + " \", \"label\":\"containing '" + std::string(term) + "'...\"}";
}
content += "]";
mimeType = "application/json; charset=utf-8";
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
static
struct MHD_Response* handle_skin(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content = getResourceAsString(urlStr.substr(6));
std::string mimeType = getMimeTypeForFile(urlStr);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
static
struct MHD_Response* handle_search(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content;
std::string mimeType;
std::string httpRedirection;
/* Retrieve the pattern to search */
const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern");
std::string patternString = kiwix::urlDecode(pattern == NULL ? "" : string(pattern));
std::string patternCorrespondingUrl;
/* Try first to load directly the article */
if (reader != NULL) {
std::vector<std::string> variants = reader->getTitleVariants(patternString);
std::vector<std::string>::iterator variantsItr = variants.begin();
pthread_mutex_lock(&readerLock);
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
reader->getPageUrlFromTitle(*variantsItr, patternCorrespondingUrl);
variantsItr++;
}
pthread_mutex_unlock(&readerLock);
/* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) {
httpRedirection = "/" + humanReadableBookId + "/" + patternCorrespondingUrl;
httpResponseCode = MHD_HTTP_FOUND;
return build_response("", 0, httpRedirection, "", false, true);
}
}
/* Make the search */
if (patternCorrespondingUrl.empty() && searcher != NULL) {
const char* start = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "start");
const char* end = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "end");
unsigned int startNumber = start != NULL ? atoi(start) : 0;
unsigned int endNumber = end != NULL ? atoi(end) : 25;
/* Get the results */
pthread_mutex_lock(&searcherLock);
try {
searcher->search(patternString, startNumber, endNumber, isVerbose());
content = searcher->getHtml();
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}
pthread_mutex_unlock(&searcherLock);
} else {
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Fulltext search unavailable</title></head><body><h1>Not Found</h1><p>There is no article with the title <b>\"" + kiwix::encodeDiples(patternString) + "\"</b> and the fulltext search engine is not available for this content.</p></body></html>";
httpResponseCode = MHD_HTTP_NOT_FOUND;
}
mimeType = "text/html; charset=utf-8";
introduceTaskbar(content, humanReadableBookId);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), httpRedirection, mimeType, deflated, true);
}
static
struct MHD_Response* handle_random(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string httpRedirection;
bool cacheEnabled = false;
httpResponseCode = MHD_HTTP_FOUND;
if (reader != NULL) {
pthread_mutex_lock(&readerLock);
std::string randomUrl = reader->getRandomPageUrl();
pthread_mutex_unlock(&readerLock);
httpRedirection = "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl);
}
return build_response("", 0, httpRedirection, "", false, false);
}
static
struct MHD_Response* handle_content(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string baseUrl;
std::string content;
std::string mimeType;
unsigned int contentLength;
bool found = false;
zim::Article article;
pthread_mutex_lock(&readerLock);
try {
found = reader->getArticleObjectByDecodedUrl(urlStr, article);
if (found) {
/* If redirect */
unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle();
}
/* To many loop */
if (loopCounter == 42)
found = false;
}
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
found = false;
}
pthread_mutex_unlock(&readerLock);
if (!found) {
if (isVerbose())
cout << "Failed to find " << urlStr << endl;
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Content not found</title></head><body><h1>Not Found</h1><p>The requested URL \"" + urlStr + "\" was not found on this server.</p></body></html>";
mimeType = "text/html";
httpResponseCode = MHD_HTTP_NOT_FOUND;
introduceTaskbar(content, humanReadableBookId);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, false);
}
try {
pthread_mutex_lock(&readerLock);
mimeType = article.getMimeType();
pthread_mutex_unlock(&readerLock);
} catch (exception &e) {
mimeType = "application/octet-stream";
}
if (isVerbose()) {
cout << "Found " << urlStr << endl;
cout << "mimeType: " << mimeType << endl;
}
pthread_mutex_lock(&readerLock);
zim::Blob raw_content = article.getData();
pthread_mutex_unlock(&readerLock);
if (mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos)
{
pthread_mutex_lock(&readerLock);
content = string(raw_content.data(), raw_content.size());
pthread_mutex_unlock(&readerLock);
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like /A/Kiwix */
if (mimeType.find("text/html") != string::npos) {
if (content.find("<body") == std::string::npos &&
content.find("<BODY") == std::string::npos) {
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
}
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(href|src)(=[\"|\']{0,1}/)([A-Z|\\-])/");
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(@import[ ]+)([\"|\']{0,1}/)([A-Z|\\-])/");
content = replaceRegex(content,
"<head><base href=\"/" + humanReadableBookId + baseUrl + "\" />",
"<head>");
introduceTaskbar(content, humanReadableBookId);
} else if (mimeType.find("text/css") != string::npos) {
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(url|URL)(\\([\"|\']{0,1}/)([A-Z|\\-])/");
}
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
else
{
return build_callback_response_from_blob(raw_content, mimeType);
}
}
static
struct MHD_Response* handle_default(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
pthread_mutex_lock(&welcomeLock);
std::string content = welcomeHTML;
pthread_mutex_unlock(&welcomeLock);
std::string mimeType = "text/html; charset=utf-8";
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
static int accessHandlerCallback(void *cls,
struct MHD_Connection * connection,
const char * url,
@ -151,8 +565,8 @@ static int accessHandlerCallback(void *cls,
const char * version,
const char * upload_data,
size_t * upload_data_size,
void ** ptr) {
void ** ptr)
{
/* Unexpected method */
if (0 != strcmp(method, "GET") && 0 != strcmp(method, "POST"))
return MHD_NO;
@ -164,28 +578,24 @@ static int accessHandlerCallback(void *cls,
return MHD_YES;
}
/* clear context pointer */
*ptr = NULL;
/* Debug */
if (isVerbose()) {
std::cout << "Requesting " << url << std::endl;
}
/* Check if the response can be compressed */
const string acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) ?
MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) : "";
const bool acceptEncodingDeflate = !acceptEncodingHeaderValue.empty() && acceptEncodingHeaderValue.find("deflate") != string::npos;
const char* acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING);
const bool acceptEncodingDeflate = acceptEncodingHeaderValue && string(acceptEncodingHeaderValue).find("deflate") != string::npos;
/* Check if range is requested */
const string acceptRangeHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE) ?
MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE) : "";
const bool acceptRange = !acceptRangeHeaderValue.empty();
const char* acceptRangeHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE);
const bool acceptRange = acceptRangeHeaderValue != NULL;
/* Prepare the variables */
struct MHD_Response *response;
std::string content;
std::string mimeType;
std::string httpRedirection;
unsigned int contentLength = 0;
bool cacheEnabled = true;
int httpResponseCode = MHD_HTTP_OK;
std::string urlStr = string(url);
@ -217,234 +627,68 @@ static int accessHandlerCallback(void *cls,
/* Get suggestions */
if (!strcmp(url, "/suggest") && reader != NULL) {
unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0;
std::string suggestion;
/* Get the suggestion pattern from the HTTP request */
const char* cTerm = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "term");
std::string term = cTerm == NULL ? "" : cTerm;
if (isVerbose()) {
std::cout << "Searching suggestions for: \"" << term << "\"" << endl;
}
/* Get the suggestions */
content = "[";
reader->searchSuggestionsSmart(term, maxSuggestionCount);
while (reader->getNextSuggestion(suggestion)) {
kiwix::stringReplacement(suggestion, "\"", "\\\"");
content += (content == "[" ? "" : ",");
content += "{\"value\":\"" + suggestion + "\",\"label\":\"" + suggestion + "\"}";
suggestionCount++;
}
/* Propose the fulltext search if possible */
if (searcher != NULL) {
content += (suggestionCount == 0 ? "" : ",");
content += "{\"value\":\"" + std::string(term) + " \", \"label\":\"containing '" + std::string(term) + "'...\"}";
}
content += "]";
mimeType = "application/json; charset=utf-8";
response = handle_suggest(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Get static skin stuff */
else if (urlStr.substr(0, 6) == "/skin/") {
content = getResourceAsString(urlStr.substr(6));
mimeType = getMimeTypeForFile(urlStr);
response = handle_skin(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Display the search restults */
else if (!strcmp(url, "/search")) {
/* Retrieve the pattern to search */
const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern");
std::string patternString = kiwix::urlDecode(pattern == NULL ? "" : string(pattern));
std::string patternCorrespondingUrl;
/* Try first to load directly the article */
if (reader != NULL) {
std::vector<std::string> variants = reader->getTitleVariants(patternString);
std::vector<std::string>::iterator variantsItr = variants.begin();
pthread_mutex_lock(&readerLock);
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
reader->getPageUrlFromTitle(*variantsItr, patternCorrespondingUrl);
variantsItr++;
}
pthread_mutex_unlock(&readerLock);
/* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) {
httpRedirection = "/" + humanReadableBookId + "/" + patternCorrespondingUrl;
}
}
/* Make the search */
if (patternCorrespondingUrl.empty() && searcher != NULL) {
const char* start = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "start");
const char* end = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "end");
unsigned int startNumber = start != NULL ? atoi(start) : 0;
unsigned int endNumber = end != NULL ? atoi(end) : 25;
/* Get the results */
pthread_mutex_lock(&searcherLock);
try {
searcher->search(patternString, startNumber, endNumber, isVerbose());
content = searcher->getHtml();
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}
pthread_mutex_unlock(&searcherLock);
} else {
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Fulltext search unavailable</title></head><body><h1>Not Found</h1><p>There is no article with the title <b>\"" + kiwix::encodeDiples(patternString) + "\"</b> and the fulltext search engine is not available for this content.</p></body></html>";
httpResponseCode = MHD_HTTP_NOT_FOUND;
}
mimeType = "text/html; charset=utf-8";
response = handle_search(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Display a random article */
else if (!strcmp(url, "/random")) {
cacheEnabled = false;
if (reader != NULL) {
pthread_mutex_lock(&readerLock);
std::string randomUrl = reader->getRandomPageUrl();
pthread_mutex_unlock(&readerLock);
httpRedirection = "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl);
}
response = handle_random(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Display the content of a ZIM content (article, image, ...) */
else if (reader != NULL) {
std::string baseUrl;
try {
pthread_mutex_lock(&readerLock);
bool found = reader->getContentByDecodedUrl(urlStr, content, contentLength, mimeType, baseUrl);
pthread_mutex_unlock(&readerLock);
if (found) {
if (isVerbose()) {
cout << "Found " << urlStr << endl;
cout << "content size: " << contentLength << endl;
cout << "mimeType: " << mimeType << endl;
}
} else {
if (isVerbose())
cout << "Failed to find " << urlStr << endl;
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Content not found</title></head><body><h1>Not Found</h1><p>The requested URL \"" + urlStr + "\" was not found on this server.</p></body></html>";
mimeType = "text/html";
httpResponseCode = MHD_HTTP_NOT_FOUND;
}
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like /A/Kiwix */
if (mimeType.find("text/html") != string::npos) {
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(href|src)(=[\"|\']{0,1}/)([A-Z|\\-])/");
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(@import[ ]+)([\"|\']{0,1}/)([A-Z|\\-])/");
content = replaceRegex(content,
"<head><base href=\"/" + humanReadableBookId + baseUrl + "\" />",
"<head>");
} else if (mimeType.find("text/css") != string::npos) {
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(url|URL)(\\([\"|\']{0,1}/)([A-Z|\\-])/");
}
response = handle_content(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Display the global Welcome page */
else {
pthread_mutex_lock(&welcomeLock);
content = welcomeHTML;
pthread_mutex_unlock(&welcomeLock);
mimeType = "text/html; charset=utf-8";
}
/* Introduce Taskbar */
if (!humanReadableBookId.empty() && mimeType.find("text/html") != string::npos) {
introduceTaskbar(content, humanReadableBookId);
}
/* Compute the lengh */
contentLength = content.size();
/* Should be deflate */
bool deflated =
contentLength > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE &&
contentLength < COMPRESSOR_BUFFER_SIZE &&
acceptEncodingDeflate &&
(mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos);
/* Compress the content if necessary */
if (deflated) {
pthread_mutex_lock(&compressorLock);
comprLen = COMPRESSOR_BUFFER_SIZE;
compress(compr, &comprLen, (const Bytef*)(content.data()), contentLength);
if (comprLen > 2 && comprLen < contentLength) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
compr += 2;
content = string((char *)compr, comprLen);
contentLength = comprLen;
} else {
deflated = false;
}
pthread_mutex_unlock(&compressorLock);
}
/* Create the response */
response = MHD_create_response_from_data(contentLength,
(void *)content.data(),
MHD_NO,
MHD_YES);
/* Make a redirection if necessary otherwise send the content */
if (!httpRedirection.empty()) {
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, httpRedirection.c_str());
httpResponseCode = MHD_HTTP_FOUND;
} else {
/* Add if necessary the content-encoding */
if (deflated) {
MHD_add_response_header(response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
}
/* clear context pointer */
*ptr = NULL;
/* Force to close the connection - cf. 100% CPU usage with v. 4.4 (in Lucid) */
//MHD_add_response_header(response, MHD_HTTP_HEADER_CONNECTION, "close");
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
if (cacheEnabled) { /* Force cache */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
} else { /* Prevent cache (for random page) */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "no-cache, no-store, must-revalidate");
response = handle_default(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Queue the response */