Merge pull request #1131 from kiwix/ungarbled_binary_resources

Ungarbled binary resources
This commit is contained in:
Kelson 2024-09-14 14:43:23 +02:00 committed by GitHub
commit 327fec1877
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 82 additions and 3 deletions

View File

@ -61,6 +61,32 @@ resource_decl_template = """{namespaces_open}
extern const std::string {identifier}; extern const std::string {identifier};
{namespaces_close}""" {namespaces_close}"""
BINARY_RESOURCE_EXTENSIONS = {'.ico', '.png', '.ttf'}
TEXT_RESOURCE_EXTENSIONS = {
'.css',
'.html',
'.js',
'.json',
'.svg',
'.tmpl',
'.webmanifest',
'.xml',
}
if not BINARY_RESOURCE_EXTENSIONS.isdisjoint(TEXT_RESOURCE_EXTENSIONS):
raise RuntimeError(f"The following file type extensions are declared to be both binary and text: {BINARY_RESOURCE_EXTENSIONS.intersection(TEXT_RESOURCE_EXTENSIONS)}")
def is_binary_resource(filename):
_, extension = os.path.splitext(filename)
is_binary = extension in BINARY_RESOURCE_EXTENSIONS
is_text = extension in TEXT_RESOURCE_EXTENSIONS
if not is_binary and not is_text:
# all file type extensions of static resources must be listed
# in either BINARY_RESOURCE_EXTENSIONS or TEXT_RESOURCE_EXTENSIONS
raise RuntimeError(f"Unknown file type extension: {extension}")
return is_binary
class Resource: class Resource:
def __init__(self, base_dirs, filename, cacheid=None): def __init__(self, base_dirs, filename, cacheid=None):
filename = filename filename = filename
@ -71,7 +97,9 @@ class Resource:
for base_dir in base_dirs: for base_dir in base_dirs:
try: try:
with open(os.path.join(base_dir, filename), 'rb') as f: with open(os.path.join(base_dir, filename), 'rb') as f:
self.data = f.read().replace(b"\r\n", b"\n") self.data = f.read()
if not is_binary_resource(filename):
self.data = self.data.replace(b"\r\n", b"\n")
found = True found = True
break break
except FileNotFoundError: except FileNotFoundError:

View File

@ -959,7 +959,7 @@ std::unique_ptr<Response> InternalServer::handle_search_request(const RequestCon
} catch(std::runtime_error& e) { } catch(std::runtime_error& e) {
// Searcher->search will throw a runtime error if there is no valid xapian database to do the search. // Searcher->search will throw a runtime error if there is no valid xapian database to do the search.
// (in case of zim file not containing a index) // (in case of zim file not containing a index)
const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css_tmpl);
HTTPErrorResponse response(request, MHD_HTTP_NOT_FOUND, HTTPErrorResponse response(request, MHD_HTTP_NOT_FOUND,
"fulltext-search-unavailable", "fulltext-search-unavailable",
"404-page-heading", "404-page-heading",

View File

@ -38,7 +38,7 @@ templates/catalog_v2_entry.xml
templates/catalog_v2_partial_entry.xml templates/catalog_v2_partial_entry.xml
templates/catalog_v2_categories.xml templates/catalog_v2_categories.xml
templates/catalog_v2_languages.xml templates/catalog_v2_languages.xml
templates/url_of_search_results_css templates/url_of_search_results_css.tmpl
templates/viewer_settings.js templates/viewer_settings.js
templates/no_js_library_page.html templates/no_js_library_page.html
templates/no_js_download.html templates/no_js_download.html

View File

@ -359,6 +359,57 @@ R"EXPECTEDRESULT( <link type="text/css" href="/ROOT%23%3F/skin/search_results
} }
} }
std::string getCacheIdFromUrl(const std::string& url)
{
const std::string q("?cacheid=");
const auto i = url.find(q);
return i == std::string::npos ? "" : url.substr(i + q.size());
}
std::string runExternalCmdAndGetItsOutput(const std::string& cmd)
{
std::string cmdOutput;
#ifdef _WIN32
#define popen _popen
#define pclose _pclose
#endif
if (FILE* pPipe = popen(cmd.c_str(), "r"))
{
char buf[128];
while (fgets(buf, 128, pPipe)) {
cmdOutput += std::string(buf, buf+128);
}
pclose(pPipe);
}
return cmdOutput;
}
std::string getSha1OfResponseData(const std::string& url)
{
const std::string pythonScript =
"import urllib.request as req; "
"import hashlib; "
"print(hashlib.sha1(req.urlopen('" + url + "').read()).hexdigest())";
const std::string cmd = "python3 -c \"" + pythonScript + "\"";
return runExternalCmdAndGetItsOutput(cmd);
}
TEST_F(ServerTest, CacheIdsOfStaticResourcesMatchTheSha1HashOfResourceContent)
{
for ( const Resource& res : all200Resources() ) {
if ( res.kind == STATIC_CONTENT ) {
const TestContext ctx{ {"url", res.url} };
const std::string fullUrl = "http://localhost:" + std::to_string(SERVER_PORT) + res.url;
const std::string sha1 = getSha1OfResponseData(fullUrl);
ASSERT_EQ(sha1.substr(0, 8), getCacheIdFromUrl(res.url)) << ctx;
}
}
}
const char* urls400[] = { const char* urls400[] = {
"/ROOT%23%3F/search", "/ROOT%23%3F/search",
"/ROOT%23%3F/search?content=zimfile", "/ROOT%23%3F/search?content=zimfile",