Merge pull request #1137 from kiwix/catalog_search_without_boolean_operators

Catalog search without support for Xapian boolean operators
This commit is contained in:
Kelson 2024-09-25 15:16:53 +00:00 committed by GitHub
commit ea31e2f42f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 176 additions and 17 deletions

View File

@ -645,8 +645,6 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
//queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???))); //queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???)));
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); //queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
const auto flags = Xapian::QueryParser::FLAG_PHRASE const auto flags = Xapian::QueryParser::FLAG_PHRASE
| Xapian::QueryParser::FLAG_BOOLEAN
| Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
| Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_LOVEHATE
| Xapian::QueryParser::FLAG_WILDCARD | Xapian::QueryParser::FLAG_WILDCARD
| partialQueryFlag; | partialQueryFlag;

View File

@ -4,7 +4,7 @@
path="./zimfile_raycharles.zim" path="./zimfile_raycharles.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles.zim"
title="Ray Charles" title="Ray Charles"
description="Wikipedia articles about Ray Charles" description="Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)"
language="eng" language="eng"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
@ -22,7 +22,7 @@
path="./zimfile_raycharles_uncategorized.zim" path="./zimfile_raycharles_uncategorized.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim"
title="Ray (uncategorized) Charles" title="Ray (uncategorized) Charles"
description="No category is assigned to this library entry." description="No category is assigned to this library entry (neither adj nor xor was considered a good option)"
language="rus,eng" language="rus,eng"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
@ -39,7 +39,7 @@
path="./zimfile&other.zim" path="./zimfile&other.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile%26other.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile%26other.zim"
title="Charles, Ray" title="Charles, Ray"
description="Wikipedia articles about Ray Charles" description="Wikipedia articles about Ray Charles or why and when one should go to library"
language="fra" language="fra"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"

View File

@ -103,7 +103,7 @@ std::string maskVariableOPDSFeedData(std::string s)
#define _CHARLES_RAY_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY( \ #define _CHARLES_RAY_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY( \
"charlesray", \ "charlesray", \
"Charles, Ray", \ "Charles, Ray", \
"Wikipedia articles about Ray Charles", \ "Wikipedia articles about Ray Charles or why and when one should go to library", \
"fra", \ "fra", \
"wikipedia_fr_ray_charles",\ "wikipedia_fr_ray_charles",\
"jazz",\ "jazz",\
@ -120,7 +120,7 @@ std::string maskVariableOPDSFeedData(std::string s)
#define _RAY_CHARLES_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY(\ #define _RAY_CHARLES_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY(\
"raycharles",\ "raycharles",\
"Ray Charles",\ "Ray Charles",\
"Wikipedia articles about Ray Charles",\ "Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)",\
"eng",\ "eng",\
"wikipedia_en_ray_charles",\ "wikipedia_en_ray_charles",\
"wikipedia",\ "wikipedia",\
@ -139,7 +139,7 @@ std::string maskVariableOPDSFeedData(std::string s)
#define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\ #define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\
"raycharles_uncategorized",\ "raycharles_uncategorized",\
"Ray (uncategorized) Charles",\ "Ray (uncategorized) Charles",\
"No category is assigned to this library entry.",\ "No category is assigned to this library entry (neither adj nor xor was considered a good option)",\
"rus,eng",\ "rus,eng",\
"wikipedia_ru_ray_charles",\ "wikipedia_ru_ray_charles",\
"",\ "",\
@ -199,8 +199,8 @@ TEST_F(LibraryServerTest, catalog_search_by_phrase)
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -218,8 +218,8 @@ TEST_F(LibraryServerTest, catalog_search_by_words)
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>3</itemsPerPage>\n" " <itemsPerPage>3</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
@ -239,8 +239,8 @@ TEST_F(LibraryServerTest, catalog_prefix_search)
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -275,8 +275,8 @@ TEST_F(LibraryServerTest, catalog_search_with_word_exclusion)
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -331,8 +331,8 @@ TEST_F(LibraryServerTest, catalog_search_by_category)
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -772,10 +772,171 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_search_terms)
" <totalResults>2</totalResults>\n" " <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY RAY_CHARLES_CATALOG_ENTRY
"</feed>\n"
);
}
TEST_F(LibraryServerTest, catalog_v2_entries_filtering_special_queries)
{
{
// 'or' doesn't act as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=Or");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=Or")
" <title>Filtered Entries (q=Or)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
}
{
// 'and' doesn't act as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=and");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=and")
" <title>Filtered Entries (q=and)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
CHARLES_RAY_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'not' doesn't act as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=not");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=not")
" <title>Filtered Entries (q=not)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'xor' doesn't act as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=xor");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=xor")
" <title>Filtered Entries (q=xor)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'or' acts as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=wikipedia%20or%20library");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20or%20library")
" <title>Filtered Entries (q=wikipedia%20or%20library)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
CHARLES_RAY_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'and' acts as a Xapian boolean operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=wikipedia%20and%20articles");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20and%20articles")
" <title>Filtered Entries (q=wikipedia%20and%20articles)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
CHARLES_RAY_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'near' doesn't act as a Xapian query operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=near");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=near")
" <title>Filtered Entries (q=near)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'adj' doesn't act as a Xapian query operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=adj");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=adj")
" <title>Filtered Entries (q=adj)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n"
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
"</feed>\n"
);
}
{
// 'near' doesn't act as a Xapian query operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=charles%20near%20why");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=charles%20near%20why")
" <title>Filtered Entries (q=charles%20near%20why)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>0</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>0</itemsPerPage>\n"
"</feed>\n"
);
}
{
// 'adj' doesn't act as a Xapian query operator
const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=charles%20adj%20why");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=charles%20adj%20why")
" <title>Filtered Entries (q=charles%20adj%20why)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>0</totalResults>\n"
" <startIndex>0</startIndex>\n"
" <itemsPerPage>0</itemsPerPage>\n"
"</feed>\n"
);
}
} }
TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language) TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
@ -841,8 +1002,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_category)
" <totalResults>2</totalResults>\n" " <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -1086,7 +1247,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__header\">\n" \ " <div class=\"book__header\">\n" \
" <div id=\"book__title\">Charles, Ray</div>\n" \ " <div id=\"book__title\">Charles, Ray</div>\n" \
" </div>\n" \ " </div>\n" \
" <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles\">Wikipedia articles about Ray Charles</div>\n" \ " <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles or why and when one should go to library\">Wikipedia articles about Ray Charles or why and when one should go to library</div>\n" \
" </div>\n" \ " </div>\n" \
" </a>\n" \ " </a>\n" \
" <div class=\"book__meta\">\n" \ " <div class=\"book__meta\">\n" \
@ -1113,7 +1274,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__header\">\n" \ " <div class=\"book__header\">\n" \
" <div id=\"book__title\">Ray Charles</div>\n" \ " <div id=\"book__title\">Ray Charles</div>\n" \
" </div>\n" \ " </div>\n" \
" <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles\">Wikipedia articles about Ray Charles</div>\n" \ " <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)\">Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)</div>\n" \
" </div>\n" \ " </div>\n" \
" </a>\n" \ " </a>\n" \
" <div class=\"book__meta\">\n" \ " <div class=\"book__meta\">\n" \
@ -1140,7 +1301,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__header\">\n" \ " <div class=\"book__header\">\n" \
" <div id=\"book__title\">Ray (uncategorized) Charles</div>\n" \ " <div id=\"book__title\">Ray (uncategorized) Charles</div>\n" \
" </div>\n" \ " </div>\n" \
" <div class=\"book__description\" title=\"No category is assigned to this library entry.\">No category is assigned to this library entry.</div>\n" \ " <div class=\"book__description\" title=\"No category is assigned to this library entry (neither adj nor xor was considered a good option)\">No category is assigned to this library entry (neither adj nor xor was considered a good option)</div>\n" \
" </div>\n" \ " </div>\n" \
" </a>\n" \ " </a>\n" \
" <div class=\"book__meta\">\n" \ " <div class=\"book__meta\">\n" \