diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index b44cb2e5..8b27eb22 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -22,6 +22,8 @@ #include #include "searcher.h" +#include "reader.h" + #include #include @@ -58,7 +60,7 @@ namespace kiwix { class XapianSearcher : public Searcher { friend class XapianResult; public: - XapianSearcher(const string &xapianDirectoryPath); + XapianSearcher(const string &xapianDirectoryPath, Reader* reader); virtual ~XapianSearcher() {}; void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose=false); @@ -69,6 +71,7 @@ namespace kiwix { void closeIndex(); void openIndex(const string &xapianDirectoryPath); + Reader* reader; Xapian::Database readableDatabase; Xapian::Stem stemmer; Xapian::MSet results; diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index 4d573e70..88076a9f 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN searcher = NULL; try { if (searcher != NULL) delete searcher; - searcher = new kiwix::XapianSearcher(cPath); + searcher = new kiwix::XapianSearcher(cPath, NULL); } catch (...) { searcher = NULL; retVal = JNI_FALSE; diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index 3f051f27..4de9d483 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -18,6 +18,7 @@ */ #include "xapianSearcher.h" +#include "xapian/myhtmlparse.h" #include #include #include @@ -41,8 +42,9 @@ std::map read_valuesmap(const std::string &s) { } /* Constructor */ - XapianSearcher::XapianSearcher(const string &xapianDirectoryPath) + XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) : Searcher(), + reader(reader), stemmer(Xapian::Stem("english")) { this->openIndex(xapianDirectoryPath); } @@ -134,14 +136,31 @@ std::map read_valuesmap(const std::string &s) { std::string XapianResult::get_snippet() { if ( searcher->valuesmap.empty() ) { - /* This is the old legacy version. Guess and try */ - return document.get_value(1); + /* This is the old legacy version. Guess and try */ + std::string stored_snippet = document.get_value(1); + if ( ! stored_snippet.empty() ) + return stored_snippet; + /* Let's continue here, and see if we can genenate one */ } else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() ) { - return document.get_value(searcher->valuesmap["snippet"]); + return document.get_value(searcher->valuesmap["snippet"]); } - return ""; + /* No reader, no snippet */ + if ( ! searcher->reader ) + return ""; + /* Get the content of the article to generate a snippet. + We parse it and use the html dump to avoid remove html tags in the + content and be able to nicely cut the text at random place. */ + MyHtmlParser htmlParser; + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType); + try { + htmlParser.parse_html(content, "UTF-8", true); + } catch (...) {} + return searcher->results.snippet(htmlParser.dump, 500); } int XapianResult::get_size() {