mirror of
https://github.com/kiwix/kiwix-tools.git
synced 2025-09-22 19:38:53 -04:00
+ introduction of the search engine
This commit is contained in:
parent
9ac478aca7
commit
474c4e9a7a
@ -16,6 +16,7 @@
|
|||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
#include <kiwix/reader.h>
|
#include <kiwix/reader.h>
|
||||||
|
#include <kiwix/searcher.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -33,6 +34,8 @@ static const string HTMLScripts = " \
|
|||||||
margin-right: 5px; \n \
|
margin-right: 5px; \n \
|
||||||
padding: 5px; \n \
|
padding: 5px; \n \
|
||||||
font-weight: bold; \n \
|
font-weight: bold; \n \
|
||||||
|
font-size: 14px; \n \
|
||||||
|
height: min; \n \
|
||||||
background: #FFFFFF; \n \
|
background: #FFFFFF; \n \
|
||||||
visibility: hidden; \n \
|
visibility: hidden; \n \
|
||||||
z-index: 100; \n \
|
z-index: 100; \n \
|
||||||
@ -99,13 +102,14 @@ else if (document.getElementById) \n \
|
|||||||
";
|
";
|
||||||
|
|
||||||
static const string HTMLDiv = " \
|
static const string HTMLDiv = " \
|
||||||
<div id=\"topbar\"> \n \
|
<div id=\"topbar\">Search<form method=\"GET\" action=\"/search\"><input type=\"textbox\" name=\"pattern\" /><input type=\"submit\" value=\"Go\" /></form></div> \n \
|
||||||
Search <input type=\"textbox\" />\n \
|
|
||||||
</div> \n \
|
|
||||||
";
|
";
|
||||||
|
|
||||||
static kiwix::Reader* reader;
|
static kiwix::Reader* reader;
|
||||||
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
static kiwix::Searcher* searcher;
|
||||||
|
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static bool hasSearchIndex = false;
|
||||||
|
|
||||||
static void appendToFirstOccurence(string &content, const string regex, const string &replacement) {
|
static void appendToFirstOccurence(string &content, const string regex, const string &replacement) {
|
||||||
regmatch_t matchs[1];
|
regmatch_t matchs[1];
|
||||||
@ -140,34 +144,62 @@ static int accessHandlerCallback(void *cls,
|
|||||||
return MHD_YES;
|
return MHD_YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prepare the variable */
|
/* Prepare the variables */
|
||||||
struct MHD_Response *response;
|
struct MHD_Response *response;
|
||||||
string content = "";
|
string content = "";
|
||||||
string mimeType = "";
|
string mimeType = "";
|
||||||
unsigned int contentLength = 0;
|
unsigned int contentLength = 0;
|
||||||
|
|
||||||
/* Mutex lock */
|
if (!strcmp(url, "/search")) {
|
||||||
pthread_mutex_lock(&lock);
|
const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern");
|
||||||
|
std::string urlStr;
|
||||||
|
std::string titleStr;
|
||||||
|
unsigned int scoreInt;
|
||||||
|
char scoreStr[4];
|
||||||
|
|
||||||
|
/* Mutex lock */
|
||||||
|
pthread_mutex_lock(&searcherLock);
|
||||||
|
|
||||||
|
searcher->search(pattern, 30);
|
||||||
|
content = "<html><head><title>Kiwix search results</title></head><body><h1>Results</h1><hr/><ol>\n";
|
||||||
|
while (searcher->getNextResult(urlStr, titleStr, scoreInt)) {
|
||||||
|
sprintf(scoreStr, "%d", scoreInt);
|
||||||
|
content += "<li><a href=\"" + urlStr + "\">" + titleStr+ "</a> (" + scoreStr + "%)</li>\n";
|
||||||
|
|
||||||
/* Load the article from the ZIM file */
|
|
||||||
cout << "Loading '" << url << "'... " << endl;
|
|
||||||
try {
|
|
||||||
reader->getContent(url, content, contentLength, mimeType);
|
|
||||||
cout << "content size: " << contentLength << endl;
|
|
||||||
cout << "mimeType: " << mimeType << endl;
|
|
||||||
|
|
||||||
/* Rewrite the content */
|
|
||||||
if (mimeType == "text/html") {
|
|
||||||
appendToFirstOccurence(content, "<head>", HTMLScripts);
|
|
||||||
appendToFirstOccurence(content, "<body[^>]*>", HTMLDiv);
|
|
||||||
contentLength = content.size();
|
|
||||||
}
|
}
|
||||||
} catch (const std::exception& e) {
|
content += "</ol></body></html>\n";
|
||||||
std::cerr << e.what() << std::endl;
|
|
||||||
|
mimeType ="text/html";
|
||||||
|
contentLength = content.size();
|
||||||
|
|
||||||
|
/* Mutex unlock */
|
||||||
|
pthread_mutex_unlock(&searcherLock);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/* Mutex Lock */
|
||||||
|
pthread_mutex_lock(&readerLock);
|
||||||
|
|
||||||
|
/* Load the article from the ZIM file */
|
||||||
|
cout << "Loading '" << url << "'... " << endl;
|
||||||
|
try {
|
||||||
|
reader->getContent(url, content, contentLength, mimeType);
|
||||||
|
cout << "content size: " << contentLength << endl;
|
||||||
|
cout << "mimeType: " << mimeType << endl;
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
std::cerr << e.what() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mutex unlock */
|
||||||
|
pthread_mutex_unlock(&readerLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mutex unlock */
|
/* Rewrite the content (add the search box) */
|
||||||
pthread_mutex_unlock(&lock);
|
if (mimeType == "text/html") {
|
||||||
|
appendToFirstOccurence(content, "<head>", HTMLScripts);
|
||||||
|
appendToFirstOccurence(content, "<body[^>]*>", HTMLDiv);
|
||||||
|
contentLength = content.size();
|
||||||
|
}
|
||||||
|
|
||||||
/* clear context pointer */
|
/* clear context pointer */
|
||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
@ -201,8 +233,35 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
string zimPath = (argv[1]);
|
string zimPath = (argv[1]);
|
||||||
int port = atoi(argv[2]);
|
int port = atoi(argv[2]);
|
||||||
|
string indexPath = (argc>3 ? argv[3] : "");
|
||||||
|
|
||||||
void *page;
|
void *page;
|
||||||
|
|
||||||
|
/* Instanciate the ZIM file handler */
|
||||||
|
try {
|
||||||
|
reader = new kiwix::Reader(zimPath);
|
||||||
|
} catch (...) {
|
||||||
|
cout << "Unable to open the ZIM file '" << zimPath << "'." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Instanciate the ZIM index (if necessary) */
|
||||||
|
if (indexPath != "") {
|
||||||
|
try {
|
||||||
|
searcher = new kiwix::Searcher(indexPath);
|
||||||
|
hasSearchIndex = true;
|
||||||
|
} catch (...) {
|
||||||
|
cout << "Unable to open the search index '" << zimPath << "'." << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hasSearchIndex = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mutex init */
|
||||||
|
pthread_mutex_init(&readerLock, NULL);
|
||||||
|
pthread_mutex_init(&searcherLock, NULL);
|
||||||
|
|
||||||
/* Start the HTTP daemon */
|
/* Start the HTTP daemon */
|
||||||
daemon = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION,
|
daemon = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION,
|
||||||
port,
|
port,
|
||||||
@ -217,17 +276,6 @@ int main(int argc, char **argv) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Instanciate the ZIM file handler */
|
|
||||||
try {
|
|
||||||
reader = new kiwix::Reader(zimPath);
|
|
||||||
} catch (...) {
|
|
||||||
cout << "Unable to open the ZIM file '" << zimPath << "'." << endl;
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Mutex init */
|
|
||||||
pthread_mutex_init(&lock, NULL);
|
|
||||||
|
|
||||||
/* Run endless */
|
/* Run endless */
|
||||||
while (42) sleep(1);
|
while (42) sleep(1);
|
||||||
|
|
||||||
@ -235,7 +283,8 @@ int main(int argc, char **argv) {
|
|||||||
MHD_stop_daemon(daemon);
|
MHD_stop_daemon(daemon);
|
||||||
|
|
||||||
/* Mutex destroy */
|
/* Mutex destroy */
|
||||||
pthread_mutex_destroy(&lock);
|
pthread_mutex_destroy(&readerLock);
|
||||||
|
pthread_mutex_destroy(&searcherLock);
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user