From 920d603a893edf3266165dfe2f6b4627d072687f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 11 Sep 2024 17:46:40 +0400 Subject: [PATCH 1/4] Validation of resource content against cacheid Added a test that checks that the static resources returned by the server have content that matches their cacheid. This test currently fails because some binary resources (e.g. png images) are garbled by the dos2unix conversion. --- test/server.cpp | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/server.cpp b/test/server.cpp index 5c73d2c3..a0ef64e3 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -359,6 +359,57 @@ R"EXPECTEDRESULT( Date: Wed, 11 Sep 2024 17:52:11 +0400 Subject: [PATCH 2/4] Disabled dos2unix conversion for binary resources --- scripts/kiwix-compile-resources | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/kiwix-compile-resources b/scripts/kiwix-compile-resources index fca014e8..7cf90733 100755 --- a/scripts/kiwix-compile-resources +++ b/scripts/kiwix-compile-resources @@ -61,6 +61,12 @@ resource_decl_template = """{namespaces_open} extern const std::string {identifier}; {namespaces_close}""" +BINARY_RESOURCE_EXTENSIONS = ('.png', '.ttf', '.ico') + +def is_binary_resource(filename): + _, extension = os.path.splitext(filename) + return extension in BINARY_RESOURCE_EXTENSIONS + class Resource: def __init__(self, base_dirs, filename, cacheid=None): filename = filename @@ -71,7 +77,9 @@ class Resource: for base_dir in base_dirs: try: with open(os.path.join(base_dir, filename), 'rb') as f: - self.data = f.read().replace(b"\r\n", b"\n") + self.data = f.read() + if not is_binary_resource(filename): + self.data = self.data.replace(b"\r\n", b"\n") found = True break except FileNotFoundError: From 0ac3130b0d5fdea1cb1851e541a7c7b8c878f811 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Thu, 12 Sep 2024 17:18:16 +0400 Subject: [PATCH 3/4] Added an extension to an extensionless resource ... so that all resources have extensions and can be automatically categorized as binary or text based on extension (coming next). --- src/server/internalServer.cpp | 2 +- static/resources_list.txt | 2 +- ...url_of_search_results_css => url_of_search_results_css.tmpl} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename static/templates/{url_of_search_results_css => url_of_search_results_css.tmpl} (100%) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index d908a24e..476e1a18 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -959,7 +959,7 @@ std::unique_ptr InternalServer::handle_search_request(const RequestCon } catch(std::runtime_error& e) { // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. // (in case of zim file not containing a index) - const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); + const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css_tmpl); HTTPErrorResponse response(request, MHD_HTTP_NOT_FOUND, "fulltext-search-unavailable", "404-page-heading", diff --git a/static/resources_list.txt b/static/resources_list.txt index 8e14c92c..faa53cb3 100644 --- a/static/resources_list.txt +++ b/static/resources_list.txt @@ -38,7 +38,7 @@ templates/catalog_v2_entry.xml templates/catalog_v2_partial_entry.xml templates/catalog_v2_categories.xml templates/catalog_v2_languages.xml -templates/url_of_search_results_css +templates/url_of_search_results_css.tmpl templates/viewer_settings.js templates/no_js_library_page.html templates/no_js_download.html diff --git a/static/templates/url_of_search_results_css b/static/templates/url_of_search_results_css.tmpl similarity index 100% rename from static/templates/url_of_search_results_css rename to static/templates/url_of_search_results_css.tmpl From c8524b95bcc6a478ccf4d6d44a9bd88d1152489a Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Thu, 12 Sep 2024 17:39:49 +0400 Subject: [PATCH 4/4] Protection against adding resources of new types Now if a static resource of a new type is added the build will fail unless the list of known file type extensions is updated. --- scripts/kiwix-compile-resources | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/scripts/kiwix-compile-resources b/scripts/kiwix-compile-resources index 7cf90733..998a4abe 100755 --- a/scripts/kiwix-compile-resources +++ b/scripts/kiwix-compile-resources @@ -61,11 +61,31 @@ resource_decl_template = """{namespaces_open} extern const std::string {identifier}; {namespaces_close}""" -BINARY_RESOURCE_EXTENSIONS = ('.png', '.ttf', '.ico') +BINARY_RESOURCE_EXTENSIONS = {'.ico', '.png', '.ttf'} + +TEXT_RESOURCE_EXTENSIONS = { + '.css', + '.html', + '.js', + '.json', + '.svg', + '.tmpl', + '.webmanifest', + '.xml', +} + +if not BINARY_RESOURCE_EXTENSIONS.isdisjoint(TEXT_RESOURCE_EXTENSIONS): + raise RuntimeError(f"The following file type extensions are declared to be both binary and text: {BINARY_RESOURCE_EXTENSIONS.intersection(TEXT_RESOURCE_EXTENSIONS)}") def is_binary_resource(filename): _, extension = os.path.splitext(filename) - return extension in BINARY_RESOURCE_EXTENSIONS + is_binary = extension in BINARY_RESOURCE_EXTENSIONS + is_text = extension in TEXT_RESOURCE_EXTENSIONS + if not is_binary and not is_text: + # all file type extensions of static resources must be listed + # in either BINARY_RESOURCE_EXTENSIONS or TEXT_RESOURCE_EXTENSIONS + raise RuntimeError(f"Unknown file type extension: {extension}") + return is_binary class Resource: def __init__(self, base_dirs, filename, cacheid=None):