diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index 2aecc724..0a2cd8c7 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -70,6 +70,14 @@ public: // functions return s; } + size_t getStringCount(const std::string& lang) const { + try { + return lang2TableMap.at(lang)->entryCount; + } catch(const std::out_of_range&) { + return 0; + } + } + private: // functions const I18nStringTable* getStringsFor(const std::string& lang) const { try { @@ -84,13 +92,17 @@ private: // data const I18nStringTable* enStrings; }; +const I18nStringDB& getStringDb() +{ + static const I18nStringDB stringDb; + return stringDb; +} + } // unnamed namespace std::string getTranslatedString(const std::string& lang, const std::string& key) { - static const I18nStringDB stringDb; - - return stringDb.get(lang, key); + return getStringDb().get(lang, key); } namespace i18n @@ -111,4 +123,70 @@ std::string ParameterizedMessage::getText(const std::string& lang) const return i18n::expandParameterizedString(lang, msgId, params); } +namespace +{ + +LangPreference parseSingleLanguagePreference(const std::string& s) +{ + const size_t langStart = s.find_first_not_of(" \t\n"); + if ( langStart == std::string::npos ) { + return {"", 0}; + } + + const size_t langEnd = s.find(';', langStart); + if ( langEnd == std::string::npos ) { + return {s.substr(langStart), 1}; + } + + const std::string lang = s.substr(langStart, langEnd - langStart); + // We don't care about langEnd == langStart which will result in an empty + // language name - it will be dismissed by parseUserLanguagePreferences() + + float q = 1.0; + int nCharsScanned; + if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned) + && langEnd + 1 + nCharsScanned == s.size() ) { + return {lang, q}; + } + + return {"", 0}; +} + +} // unnamed namespace + +UserLangPreferences parseUserLanguagePreferences(const std::string& s) +{ + UserLangPreferences result; + std::istringstream iss(s); + std::string singleLangPrefStr; + while ( std::getline(iss, singleLangPrefStr, ',') ) + { + const auto langPref = parseSingleLanguagePreference(singleLangPrefStr); + if ( !langPref.lang.empty() && langPref.preference > 0 ) { + result.push_back(langPref); + } + } + + return result; +} + +std::string selectMostSuitableLanguage(const UserLangPreferences& prefs) +{ + if ( prefs.empty() ) { + return "en"; + } + + std::string bestLangSoFar("en"); + float bestScoreSoFar = 0; + const auto& stringDb = getStringDb(); + for ( const auto& entry : prefs ) { + const float score = entry.preference * stringDb.getStringCount(entry.lang); + if ( score > bestScoreSoFar ) { + bestScoreSoFar = score; + bestLangSoFar = entry.lang; + } + } + return bestLangSoFar; +} + } // namespace kiwix diff --git a/src/server/i18n.h b/src/server/i18n.h index d4b084d3..23236074 100644 --- a/src/server/i18n.h +++ b/src/server/i18n.h @@ -89,6 +89,18 @@ private: // data const Parameters params; }; +struct LangPreference +{ + const std::string lang; + const float preference; +}; + +typedef std::vector UserLangPreferences; + +UserLangPreferences parseUserLanguagePreferences(const std::string& s); + +std::string selectMostSuitableLanguage(const UserLangPreferences& prefs); + } // namespace kiwix #endif // KIWIX_SERVER_I18N diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 5e191afa..4eac4ad3 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -25,8 +25,10 @@ #include #include #include +#include #include "tools/stringTools.h" +#include "i18n.h" namespace kiwix { @@ -80,6 +82,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection, { MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this); MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this); + MHD_get_connection_values(connection, MHD_COOKIE_KIND, &RequestContext::fill_cookie, this); try { acceptEncodingGzip = @@ -89,6 +92,8 @@ RequestContext::RequestContext(struct MHD_Connection* connection, try { byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE)); } catch (const std::out_of_range&) {} + + userlang = determine_user_language(); } RequestContext::~RequestContext() @@ -118,6 +123,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind, return MHD_YES; } +MHD_Result RequestContext::fill_cookie(void *__this, enum MHD_ValueKind kind, + const char *key, const char* value) +{ + RequestContext *_this = static_cast(__this); + _this->cookies[key] = value == nullptr ? "" : value; + return MHD_YES; +} + void RequestContext::print_debug_info() const { printf("method : %s (%d)\n", method==RequestMethod::GET ? "GET" : method==RequestMethod::POST ? "POST" : @@ -198,13 +211,24 @@ std::string RequestContext::get_header(const std::string& name) const { } std::string RequestContext::get_user_language() const +{ + return userlang; +} + +std::string RequestContext::determine_user_language() const { try { return get_argument("userlang"); } catch(const std::out_of_range&) {} try { - return get_header("Accept-Language"); + return cookies.at("userlang"); + } catch(const std::out_of_range&) {} + + try { + const std::string acceptLanguage = get_header("Accept-Language"); + const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage); + return selectMostSuitableLanguage(userLangPrefs); } catch(const std::out_of_range&) {} return "en"; diff --git a/src/server/request_context.h b/src/server/request_context.h index de02d465..07339324 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -130,10 +130,15 @@ class RequestContext { ByteRange byteRange_; std::map headers; std::map> arguments; + std::map cookies; std::string queryString; + std::string userlang; private: // functions + std::string determine_user_language() const; + static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*); + static MHD_Result fill_cookie(void *, enum MHD_ValueKind, const char*, const char*); static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*); }; diff --git a/src/server/response.cpp b/src/server/response.cpp index 6020f9f7..c0cd8bd5 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -387,6 +387,9 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect MHD_add_response_header(response, p.first.c_str(), p.second.c_str()); } + const std::string cookie = "userlang=" + request.get_user_language(); + MHD_add_response_header(response, MHD_HTTP_HEADER_SET_COOKIE, cookie.c_str()); + if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT) m_returnCode = MHD_HTTP_PARTIAL_CONTENT; diff --git a/test/otherTools.cpp b/test/otherTools.cpp index 221c2ac3..9b6ce1fa 100644 --- a/test/otherTools.cpp +++ b/test/otherTools.cpp @@ -20,6 +20,7 @@ #include "gtest/gtest.h" #include "../src/tools/otherTools.h" #include "zim/suggestion_iterator.h" +#include "../src/server/i18n.h" #include @@ -172,3 +173,63 @@ R"EXPECTEDJSON([ )EXPECTEDJSON" ); } + +std::string toString(const kiwix::LangPreference& x) +{ + std::ostringstream oss; + oss << "{" << x.lang << ", " << x.preference << "}"; + return oss.str(); +} + +std::string toString(const kiwix::UserLangPreferences& prefs) { + std::ostringstream oss; + for ( const auto& x : prefs ) + oss << toString(x); + return oss.str(); +} + +TEST(I18n, parseUserLanguagePreferences) +{ + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("*")), + "{*, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr")), + "{fr, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH")), + "{fr-CH, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr, en-US")), + "{fr, 1}{en-US, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5")), + "{ru, 0.5}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH,ru;q=0.5")), + "{fr-CH, 1}{ru, 0.5}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5, *;q=0.1")), + "{ru, 0.5}{*, 0.1}" + ); + + // rejected input + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;0.8")), + "" + ); + + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr,ru;0.8,en;q=0.5")), + "{fr, 1}{en, 0.5}" + ); +} diff --git a/test/server.cpp b/test/server.cpp index d110707c..2c6156c2 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -976,58 +976,145 @@ TEST_F(ServerTest, UserLanguageControl) { struct TestData { + const std::string description; const std::string url; const std::string acceptLanguageHeader; + const char* const requestCookie; // Cookie: header of the request + const char* const responseSetCookie; // Set-Cookie: header of the response const std::string expectedH1; operator TestContext() const { - return TestContext{ + TestContext ctx{ + {"description", description}, {"url", url}, {"acceptLanguageHeader", acceptLanguageHeader}, }; + + if ( requestCookie ) { + ctx.push_back({"requestCookie", requestCookie}); + } + + return ctx; } }; + const char* const NO_COOKIE = nullptr; + const TestData testData[] = { { + "Default user language is English", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { + "userlang URL query parameter is respected", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { + "userlang URL query parameter is respected", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=test", /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { + "'Accept-Language: *' is handled", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "*", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { + "Accept-Language: header is respected", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=test", /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { - // userlang query parameter takes precedence over Accept-Language + "userlang cookie is respected", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "anothercookie=123; userlang=test", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test; anothercookie=abc", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "cookie1=abc; userlang=test; cookie2=xyz", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "Multiple userlang cookies are not a problem", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "cookie1=abc; userlang=en; userlang=test; cookie2=xyz", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang query parameter takes precedence over Accept-Language", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "test", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { - // The value of the Accept-Language header is not currently parsed. + "userlang query parameter takes precedence over its cookie counterpart", + /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test", + /*Response Set-Cookie:*/ "userlang=en", + /* expected

*/ "Not Found" + }, + { + "userlang in cookies takes precedence over Accept-Language", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "test", + /*Request Cookie:*/ "userlang=en", + /*Response Set-Cookie:*/ "userlang=en", + /* expected

*/ "Not Found" + }, + { + "The value of the Accept-Language header is not currently parsed.", // In case of a comma separated list of languages (optionally weighted // with quality values) the default (en) language is used instead. /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test;q=0.9, en;q=0.2", - /* expected

*/ "Not Found" + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, }; @@ -1038,7 +1125,15 @@ TEST_F(ServerTest, UserLanguageControl) if ( !t.acceptLanguageHeader.empty() ) { headers.insert({"Accept-Language", t.acceptLanguageHeader}); } + if ( t.requestCookie ) { + headers.insert({"Cookie", t.requestCookie}); + } const auto r = zfs1_->GET(t.url.c_str(), headers); + if ( t.responseSetCookie ) { + EXPECT_EQ(t.responseSetCookie, getHeaderValue(r->headers, "Set-Cookie")) << t; + } else { + EXPECT_FALSE(r->has_header("Set-Cookie")); + } std::regex_search(r->body, h1Match, h1Regex); const std::string h1(h1Match[1]); EXPECT_EQ(h1, t.expectedH1) << t;