From 8d39b0b3433255fc9ef7ef4336e26b5b979b6f5a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 5 Jul 2017 11:28:08 +0200 Subject: [PATCH] Search result objects now have a get_content method. This was not necessary when searching in only one zim file as `url` was enough to get the article (and so the content). If we want to search in several zim in the same time, we need a way to get the content directly. --- include/searcher.h | 1 + include/xapianSearcher.h | 1 + src/searcher.cpp | 8 ++++++++ src/xapianSearcher.cpp | 22 +++++++++++++++++----- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/include/searcher.h b/include/searcher.h index e5549778..af28e521 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -46,6 +46,7 @@ class Result virtual std::string get_title() = 0; virtual int get_score() = 0; virtual std::string get_snippet() = 0; + virtual std::string get_content() = 0; virtual int get_wordCount() = 0; virtual int get_size() = 0; }; diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index 907ca733..dcbe5647 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -43,6 +43,7 @@ class XapianResult : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); diff --git a/src/searcher.cpp b/src/searcher.cpp index 4bfeab35..d83adc7f 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -45,6 +45,7 @@ class _Result : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); @@ -241,6 +242,13 @@ std::string _Result::get_snippet() { return iterator.get_snippet(); } +std::string _Result::get_content() +{ + if (iterator->good()) { + return iterator->getData(); + } + return ""; +} int _Result::get_size() { return iterator.get_size(); diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index aa0223d9..20688815 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -177,11 +177,10 @@ std::string XapianResult::get_snippet() We parse it and use the html dump to avoid remove html tags in the content and be able to nicely cut the text at random place. */ MyHtmlParser htmlParser; - std::string content; - unsigned int contentLength; - std::string contentType; - searcher->reader->getContentByUrl( - get_url(), content, contentLength, contentType); + std::string content = get_content(); + if (content.empty()) { + return content; + } try { htmlParser.parse_html(content, "UTF-8", true); } catch (...) { @@ -189,6 +188,19 @@ std::string XapianResult::get_snippet() return searcher->results.snippet(htmlParser.dump, 500); } +std::string XapianResult::get_content() +{ + if (!searcher->reader) { + return ""; + } + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl( + get_url(), content, contentLength, contentType); + return content; +} + int XapianResult::get_size() { if (searcher->valuesmap.empty()) {