Make kiwix-server multi-threaded.

This commit is contained in:
Matthieu Gautier 2017-09-26 17:32:12 +02:00
parent a2627e81c4
commit 0364951f75

View File

@ -60,11 +60,13 @@ extern "C" {
#include <zim/fileiterator.h> #include <zim/fileiterator.h>
#include <zim/zim.h> #include <zim/zim.h>
#include <zlib.h> #include <zlib.h>
#include <atomic>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <sstream> #include <sstream>
#include <thread>
#include <string> #include <string>
#include <vector> #include <vector>
#include "server-resources.h" #include "server-resources.h"
@ -88,17 +90,14 @@ using namespace std;
static bool noLibraryButtonFlag = false; static bool noLibraryButtonFlag = false;
static bool noSearchBarFlag = false; static bool noSearchBarFlag = false;
static string welcomeHTML; static string welcomeHTML;
static bool verboseFlag = false; static std::atomic_bool isVerbose(false);
static std::map<std::string, std::string> extMimeTypes; static std::map<std::string, std::string> extMimeTypes;
static std::map<std::string, kiwix::Reader*> readers; static std::map<std::string, kiwix::Reader*> readers;
static std::map<std::string, kiwix::Searcher*> searchers; static std::map<std::string, kiwix::Searcher*> searchers;
static kiwix::Searcher* globalSearcher = nullptr; static kiwix::Searcher* globalSearcher = nullptr;
static pthread_mutex_t zimLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t searchLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mapLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t welcomeLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t compressorLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t compressorLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t verboseFlagLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mimeTypeLock = PTHREAD_MUTEX_INITIALIZER;
/* Try to get the mimeType from the file extension */ /* Try to get the mimeType from the file extension */
static std::string getMimeTypeForFile(const std::string& filename) static std::string getMimeTypeForFile(const std::string& filename)
@ -109,14 +108,15 @@ static std::string getMimeTypeForFile(const std::string& filename)
if (pos != std::string::npos) { if (pos != std::string::npos) {
std::string extension = filename.substr(pos + 1); std::string extension = filename.substr(pos + 1);
pthread_mutex_lock(&mimeTypeLock); auto it = extMimeTypes.find(extension);
if (extMimeTypes.find(extension) != extMimeTypes.end()) { if (it != extMimeTypes.end()) {
mimeType = extMimeTypes[extension]; mimeType = it->second;
} else if (extMimeTypes.find(kiwix::lcAll(extension)) } else {
!= extMimeTypes.end()) { it = extMimeTypes.find(kiwix::lcAll(extension));
mimeType = extMimeTypes[kiwix::lcAll(extension)]; if (it != extMimeTypes.end()) {
mimeType = it->second;
}
} }
pthread_mutex_unlock(&mimeTypeLock);
} }
return mimeType; return mimeType;
@ -124,6 +124,7 @@ static std::string getMimeTypeForFile(const std::string& filename)
void introduceTaskbar(string& content, const string& humanReadableBookId) void introduceTaskbar(string& content, const string& humanReadableBookId)
{ {
pthread_mutex_lock(&regexLock);
if (!noSearchBarFlag) { if (!noSearchBarFlag) {
content = appendToFirstOccurence( content = appendToFirstOccurence(
content, content,
@ -149,16 +150,7 @@ void introduceTaskbar(string& content, const string& humanReadableBookId)
"__CONTENT__")); "__CONTENT__"));
} }
} }
} pthread_mutex_unlock(&regexLock);
/* Should display debug information? */
bool isVerbose()
{
bool value;
pthread_mutex_lock(&verboseFlagLock);
value = verboseFlag;
pthread_mutex_unlock(&verboseFlagLock);
return value;
} }
static bool compress_content(string& content, const string& mimeType) static bool compress_content(string& content, const string& mimeType)
@ -264,31 +256,25 @@ ssize_t callback_reader_from_blob(void* cls,
size_t max) size_t max)
{ {
zim::Blob* blob = static_cast<zim::Blob*>(cls); zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&zimLock);
size_t max_size_to_set = min<size_t>(max, blob->size() - pos); size_t max_size_to_set = min<size_t>(max, blob->size() - pos);
if (max_size_to_set <= 0) { if (max_size_to_set <= 0) {
pthread_mutex_unlock(&zimLock);
return MHD_CONTENT_READER_END_WITH_ERROR; return MHD_CONTENT_READER_END_WITH_ERROR;
} }
memcpy(buf, blob->data() + pos, max_size_to_set); memcpy(buf, blob->data() + pos, max_size_to_set);
pthread_mutex_unlock(&zimLock);
return max_size_to_set; return max_size_to_set;
} }
void callback_free_blob(void* cls) void callback_free_blob(void* cls)
{ {
zim::Blob* blob = static_cast<zim::Blob*>(cls); zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&zimLock);
delete blob; delete blob;
pthread_mutex_unlock(&zimLock);
} }
static struct MHD_Response* build_callback_response_from_blob( static struct MHD_Response* build_callback_response_from_blob(
zim::Blob& blob, const std::string& mimeType) zim::Blob& blob, const std::string& mimeType)
{ {
pthread_mutex_lock(&zimLock);
zim::Blob* p_blob = new zim::Blob(blob); zim::Blob* p_blob = new zim::Blob(blob);
struct MHD_Response* response struct MHD_Response* response
= MHD_create_response_from_callback(blob.size(), = MHD_create_response_from_callback(blob.size(),
@ -296,7 +282,6 @@ static struct MHD_Response* build_callback_response_from_blob(
callback_reader_from_blob, callback_reader_from_blob,
p_blob, p_blob,
callback_free_blob); callback_free_blob);
pthread_mutex_unlock(&zimLock);
/* Tell the client that byte ranges are accepted */ /* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes"); MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
@ -334,11 +319,11 @@ static struct MHD_Response* handle_suggest(
const char* cTerm const char* cTerm
= MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "term"); = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "term");
std::string term = cTerm == NULL ? "" : cTerm; std::string term = cTerm == NULL ? "" : cTerm;
if (isVerbose()) { if (isVerbose.load()) {
std::cout << "Searching suggestions for: \"" << term << "\"" << endl; std::cout << "Searching suggestions for: \"" << term << "\"" << endl;
} }
pthread_mutex_lock(&zimLock); pthread_mutex_lock(&searchLock);
/* Get the suggestions */ /* Get the suggestions */
content = "["; content = "[";
reader->searchSuggestionsSmart(term, maxSuggestionCount); reader->searchSuggestionsSmart(term, maxSuggestionCount);
@ -349,7 +334,7 @@ static struct MHD_Response* handle_suggest(
+ "\"}"; + "\"}";
suggestionCount++; suggestionCount++;
} }
pthread_mutex_unlock(&zimLock); pthread_mutex_unlock(&searchLock);
/* Propose the fulltext search if possible */ /* Propose the fulltext search if possible */
if (searcher != NULL) { if (searcher != NULL) {
@ -406,12 +391,10 @@ static struct MHD_Response* handle_search(
std::vector<std::string> variants = reader->getTitleVariants(patternString); std::vector<std::string> variants = reader->getTitleVariants(patternString);
std::vector<std::string>::iterator variantsItr = variants.begin(); std::vector<std::string>::iterator variantsItr = variants.begin();
pthread_mutex_lock(&zimLock);
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) { while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
reader->getPageUrlFromTitle(*variantsItr, patternCorrespondingUrl); reader->getPageUrlFromTitle(*variantsItr, patternCorrespondingUrl);
variantsItr++; variantsItr++;
} }
pthread_mutex_unlock(&zimLock);
/* If article found then redirect directly to it */ /* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) { if (!patternCorrespondingUrl.empty()) {
@ -432,14 +415,14 @@ static struct MHD_Response* handle_search(
unsigned int endNumber = end != NULL ? atoi(end) : 25; unsigned int endNumber = end != NULL ? atoi(end) : 25;
/* Get the results */ /* Get the results */
pthread_mutex_lock(&zimLock); pthread_mutex_lock(&searchLock);
try { try {
searcher->search(patternString, startNumber, endNumber, isVerbose()); searcher->search(patternString, startNumber, endNumber, isVerbose.load());
content = searcher->getHtml(); content = searcher->getHtml();
} catch (const std::exception& e) { } catch (const std::exception& e) {
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
} }
pthread_mutex_unlock(&zimLock); pthread_mutex_unlock(&searchLock);
} else { } else {
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Fulltext search unavailable</title></head><body><h1>Not Found</h1><p>There is no article with the title <b>\"" + kiwix::encodeDiples(patternString) + "\"</b> and the fulltext search engine is not available for this content.</p></body></html>"; content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Fulltext search unavailable</title></head><body><h1>Not Found</h1><p>There is no article with the title <b>\"" + kiwix::encodeDiples(patternString) + "\"</b> and the fulltext search engine is not available for this content.</p></body></html>";
httpResponseCode = MHD_HTTP_NOT_FOUND; httpResponseCode = MHD_HTTP_NOT_FOUND;
@ -470,9 +453,7 @@ static struct MHD_Response* handle_random(
std::string httpRedirection; std::string httpRedirection;
httpResponseCode = MHD_HTTP_FOUND; httpResponseCode = MHD_HTTP_FOUND;
if (reader != NULL) { if (reader != NULL) {
pthread_mutex_lock(&zimLock);
std::string randomUrl = reader->getRandomPageUrl(); std::string randomUrl = reader->getRandomPageUrl();
pthread_mutex_unlock(&zimLock);
httpRedirection httpRedirection
= "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl); = "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl);
} }
@ -494,7 +475,6 @@ static struct MHD_Response* handle_content(
bool found = false; bool found = false;
zim::Article article; zim::Article article;
pthread_mutex_lock(&zimLock);
try { try {
found = reader->getArticleObjectByDecodedUrl(urlStr, article); found = reader->getArticleObjectByDecodedUrl(urlStr, article);
@ -513,10 +493,9 @@ static struct MHD_Response* handle_content(
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
found = false; found = false;
} }
pthread_mutex_unlock(&zimLock);
if (!found) { if (!found) {
if (isVerbose()) if (isVerbose.load())
cout << "Failed to find " << urlStr << endl; cout << "Failed to find " << urlStr << endl;
content content
@ -535,34 +514,29 @@ static struct MHD_Response* handle_content(
} }
try { try {
pthread_mutex_lock(&zimLock);
mimeType = article.getMimeType(); mimeType = article.getMimeType();
pthread_mutex_unlock(&zimLock);
} catch (exception& e) { } catch (exception& e) {
mimeType = "application/octet-stream"; mimeType = "application/octet-stream";
} }
if (isVerbose()) { if (isVerbose.load()) {
cout << "Found " << urlStr << endl; cout << "Found " << urlStr << endl;
cout << "mimeType: " << mimeType << endl; cout << "mimeType: " << mimeType << endl;
} }
pthread_mutex_lock(&zimLock);
zim::Blob raw_content = article.getData(); zim::Blob raw_content = article.getData();
pthread_mutex_unlock(&zimLock);
if (mimeType.find("text/") != string::npos if (mimeType.find("text/") != string::npos
|| mimeType.find("application/javascript") != string::npos || mimeType.find("application/javascript") != string::npos
|| mimeType.find("application/json") != string::npos) { || mimeType.find("application/json") != string::npos) {
pthread_mutex_lock(&zimLock);
content = string(raw_content.data(), raw_content.size()); content = string(raw_content.data(), raw_content.size());
pthread_mutex_unlock(&zimLock);
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like /* Special rewrite URL in case of ZIM file use intern *asbolute* url like
* /A/Kiwix */ * /A/Kiwix */
if (mimeType.find("text/html") != string::npos) { if (mimeType.find("text/html") != string::npos) {
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" baseUrl = "/" + std::string(1, article.getNamespace()) + "/"
+ article.getUrl(); + article.getUrl();
pthread_mutex_lock(&regexLock);
content = replaceRegex(content, content = replaceRegex(content,
"$1$2" + humanReadableBookId + "/$3/", "$1$2" + humanReadableBookId + "/$3/",
"(href|src)(=[\"|\']{0,1}/)([A-Z|\\-])/"); "(href|src)(=[\"|\']{0,1}/)([A-Z|\\-])/");
@ -573,11 +547,14 @@ static struct MHD_Response* handle_content(
content, content,
"<head><base href=\"/" + humanReadableBookId + baseUrl + "\" />", "<head><base href=\"/" + humanReadableBookId + baseUrl + "\" />",
"<head>"); "<head>");
pthread_mutex_unlock(&regexLock);
introduceTaskbar(content, humanReadableBookId); introduceTaskbar(content, humanReadableBookId);
} else if (mimeType.find("text/css") != string::npos) { } else if (mimeType.find("text/css") != string::npos) {
pthread_mutex_lock(&regexLock);
content = replaceRegex(content, content = replaceRegex(content,
"$1$2" + humanReadableBookId + "/$3/", "$1$2" + humanReadableBookId + "/$3/",
"(url|URL)(\\([\"|\']{0,1}/)([A-Z|\\-])/"); "(url|URL)(\\([\"|\']{0,1}/)([A-Z|\\-])/");
pthread_mutex_unlock(&regexLock);
} }
bool deflated bool deflated
@ -598,9 +575,7 @@ static struct MHD_Response* handle_default(
const std::string& humanReadableBookId, const std::string& humanReadableBookId,
bool acceptEncodingDeflate) bool acceptEncodingDeflate)
{ {
pthread_mutex_lock(&welcomeLock);
std::string content = welcomeHTML; std::string content = welcomeHTML;
pthread_mutex_unlock(&welcomeLock);
std::string mimeType = "text/html; charset=utf-8"; std::string mimeType = "text/html; charset=utf-8";
@ -634,7 +609,7 @@ static int accessHandlerCallback(void* cls,
*ptr = NULL; *ptr = NULL;
/* Debug */ /* Debug */
if (isVerbose()) { if (isVerbose.load()) {
std::cout << "Requesting " << url << std::endl; std::cout << "Requesting " << url << std::endl;
} }
@ -676,7 +651,6 @@ static int accessHandlerCallback(void* cls,
} }
} }
pthread_mutex_lock(&mapLock);
kiwix::Searcher* searcher kiwix::Searcher* searcher
= searchers.find(humanReadableBookId) != searchers.end() = searchers.find(humanReadableBookId) != searchers.end()
? searchers.find(humanReadableBookId)->second ? searchers.find(humanReadableBookId)->second
@ -687,7 +661,6 @@ static int accessHandlerCallback(void* cls,
if (reader == NULL) { if (reader == NULL) {
humanReadableBookId = ""; humanReadableBookId = "";
} }
pthread_mutex_unlock(&mapLock);
/* Get suggestions */ /* Get suggestions */
if (!strcmp(url, "/suggest") && reader != NULL) { if (!strcmp(url, "/suggest") && reader != NULL) {
@ -775,6 +748,7 @@ int main(int argc, char** argv)
int libraryFlag = false; int libraryFlag = false;
string PPIDString; string PPIDString;
unsigned int PPID = 0; unsigned int PPID = 0;
unsigned int nb_threads = std::thread::hardware_concurrency();
kiwix::Manager libraryManager; kiwix::Manager libraryManager;
static struct option long_options[] static struct option long_options[]
@ -787,13 +761,14 @@ int main(int argc, char** argv)
{"attachToProcess", required_argument, 0, 'a'}, {"attachToProcess", required_argument, 0, 'a'},
{"port", required_argument, 0, 'p'}, {"port", required_argument, 0, 'p'},
{"interface", required_argument, 0, 'f'}, {"interface", required_argument, 0, 'f'},
{"threads", required_argument, 0, 't'},
{0, 0, 0, 0}}; {0, 0, 0, 0}};
/* Argument parsing */ /* Argument parsing */
while (true) { while (true) {
int option_index = 0; int option_index = 0;
int c int c
= getopt_long(argc, argv, "mndvli:a:p:f:", long_options, &option_index); = getopt_long(argc, argv, "mndvli:a:p:f:t:", long_options, &option_index);
if (c != -1) { if (c != -1) {
switch (c) { switch (c) {
@ -801,7 +776,7 @@ int main(int argc, char** argv)
daemonFlag = true; daemonFlag = true;
break; break;
case 'v': case 'v':
verboseFlag = true; isVerbose.store(true);
break; break;
case 'l': case 'l':
libraryFlag = true; libraryFlag = true;
@ -825,6 +800,9 @@ int main(int argc, char** argv)
case 'f': case 'f':
interface = string(optarg); interface = string(optarg);
break; break;
case 't':
nb_threads = atoi(optarg);
break;
} }
} else { } else {
if (optind <= argc) { if (optind <= argc) {
@ -843,11 +821,13 @@ int main(int argc, char** argv)
if (zimPathes.empty() && libraryPath.empty()) { if (zimPathes.empty() && libraryPath.empty()) {
cerr << "Usage: kiwix-serve [--index=INDEX_PATH] [--port=PORT] [--verbose] " cerr << "Usage: kiwix-serve [--index=INDEX_PATH] [--port=PORT] [--verbose] "
"[--nosearchbar] [--nolibrarybutton] [--daemon] " "[--nosearchbar] [--nolibrarybutton] [--daemon] "
"[--attachToProcess=PID] [--interface=IF_NAME] ZIM_PATH+" "[--attachToProcess=PID] [--interface=IF_NAME] "
"[--threads=NB_THREAD(" << nb_threads << ")] ZIM_PATH+"
<< endl; << endl;
cerr << " kiwix-serve --library [--port=PORT] [--verbose] [--daemon] " cerr << " kiwix-serve --library [--port=PORT] [--verbose] [--daemon] "
"[--nosearchbar] [--nolibrarybutton] [--attachToProcess=PID] " "[--nosearchbar] [--nolibrarybutton] [--attachToProcess=PID] "
"[--interface=IF_NAME] LIBRARY_PATH" "[--interface=IF_NAME] [--threads=NB_THREAD(" << nb_threads << ")] "
"LIBRARY_PATH"
<< endl; << endl;
cerr << "\n If you set more than one ZIM_PATH, you cannot set a " cerr << "\n If you set more than one ZIM_PATH, you cannot set a "
"INDEX_PATH." "INDEX_PATH."
@ -988,8 +968,10 @@ int main(int argc, char** argv)
welcomeBooksHtml += "" welcomeBooksHtml += ""
"</div>"; "</div>";
pthread_mutex_lock(&regexLock);
welcomeHTML welcomeHTML
= replaceRegex(RESOURCE::home_html_tmpl, welcomeBooksHtml, "__BOOKS__"); = replaceRegex(RESOURCE::home_html_tmpl, welcomeBooksHtml, "__BOOKS__");
pthread_mutex_unlock(&regexLock);
introduceTaskbar(welcomeHTML, ""); introduceTaskbar(welcomeHTML, "");
@ -1013,12 +995,9 @@ int main(int argc, char** argv)
#endif #endif
/* Mutex init */ /* Mutex init */
pthread_mutex_init(&zimLock, NULL); pthread_mutex_init(&searchLock, NULL);
pthread_mutex_init(&mapLock, NULL);
pthread_mutex_init(&welcomeLock, NULL);
pthread_mutex_init(&compressorLock, NULL); pthread_mutex_init(&compressorLock, NULL);
pthread_mutex_init(&verboseFlagLock, NULL); pthread_mutex_init(&regexLock, NULL);
pthread_mutex_init(&mimeTypeLock, NULL);
/* Hard coded mimetypes */ /* Hard coded mimetypes */
extMimeTypes["html"] = "text/html"; extMimeTypes["html"] = "text/html";
@ -1089,14 +1068,14 @@ int main(int argc, char** argv)
exit(1); exit(1);
} }
daemon = MHD_start_daemon(MHD_USE_SELECT_INTERNALLY, daemon = MHD_start_daemon(MHD_USE_POLL_INTERNALLY,
serverPort, serverPort,
NULL, NULL,
NULL, NULL,
&accessHandlerCallback, &accessHandlerCallback,
page, page,
MHD_OPTION_SOCK_ADDR, MHD_OPTION_SOCK_ADDR, &sockAddr,
&sockAddr, MHD_OPTION_THREAD_POOL_SIZE, nb_threads,
MHD_OPTION_END); MHD_OPTION_END);
#else #else
cerr << "Setting 'interface' not yet implemented for Windows" << endl; cerr << "Setting 'interface' not yet implemented for Windows" << endl;
@ -1104,12 +1083,13 @@ int main(int argc, char** argv)
#endif #endif
} else { } else {
daemon = MHD_start_daemon(MHD_USE_SELECT_INTERNALLY, daemon = MHD_start_daemon(MHD_USE_POLL_INTERNALLY,
serverPort, serverPort,
NULL, NULL,
NULL, NULL,
&accessHandlerCallback, &accessHandlerCallback,
page, page,
MHD_OPTION_THREAD_POOL_SIZE, nb_threads,
MHD_OPTION_END); MHD_OPTION_END);
} }
@ -1160,11 +1140,8 @@ int main(int argc, char** argv)
MHD_stop_daemon(daemon); MHD_stop_daemon(daemon);
/* Mutex destroy */ /* Mutex destroy */
pthread_mutex_destroy(&zimLock); pthread_mutex_destroy(&searchLock);
pthread_mutex_destroy(&compressorLock); pthread_mutex_destroy(&compressorLock);
pthread_mutex_destroy(&mapLock); pthread_mutex_destroy(&regexLock);
pthread_mutex_destroy(&welcomeLock);
pthread_mutex_destroy(&verboseFlagLock);
pthread_mutex_destroy(&mimeTypeLock);
exit(0); exit(0);
} }