diff --git a/include/dwarfs/util.h b/include/dwarfs/util.h index 9606960c..54d02d54 100644 --- a/include/dwarfs/util.h +++ b/include/dwarfs/util.h @@ -63,6 +63,7 @@ int call_sys_main_iolayer(std::span args, iolayer const& iol, size_t utf8_display_width(char const* p, size_t len); size_t utf8_display_width(std::string const& str); void utf8_truncate(std::string& str, size_t len); +void utf8_sanitize(std::string& str); void shorten_path_string(std::string& path, char separator, size_t max_len); diff --git a/src/dwarfs/console_writer.cpp b/src/dwarfs/console_writer.cpp index c5085bda..9dd35e7f 100644 --- a/src/dwarfs/console_writer.cpp +++ b/src/dwarfs/console_writer.cpp @@ -91,9 +91,15 @@ void output_context_line(terminal const& term, std::ostream& os, assert(width >= progress_w + speed_w + 1); + std::string path; + if (st.path) { + path = *st.path; + utf8_sanitize(path); + } + size_t status_w = width - (progress_w + speed_w + 1); - auto path_len = st.path ? utf8_display_width(*st.path) : 0; - size_t extra_len = st.path && !st.status_string.empty() ? 2 : 0; + auto path_len = !path.empty() ? utf8_display_width(path) : 0; + size_t extra_len = !path.empty() && !st.status_string.empty() ? 2 : 0; if (status_w < st.context.size() + st.status_string.size() + path_len + extra_len) { @@ -106,11 +112,10 @@ void output_context_line(terminal const& term, std::ostream& os, if (max_path_len > 0) { shorten_path_string( - *st.path, - static_cast(std::filesystem::path::preferred_separator), + path, static_cast(std::filesystem::path::preferred_separator), max_path_len); - path_len = utf8_display_width(*st.path); + path_len = utf8_display_width(path); } } @@ -128,7 +133,7 @@ void output_context_line(terminal const& term, std::ostream& os, if (!st.status_string.empty()) { st.status_string += ": "; } - st.status_string += *st.path; + st.status_string += path; } std::string progress; diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 46219eee..8a4e629a 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -265,6 +265,7 @@ std::string status_string(progress const& p, size_t width) { label = "writing: "; path = i->any()->path_as_string(); } + utf8_sanitize(path); shorten_path_string( path, static_cast(std::filesystem::path::preferred_separator), width - label.size()); diff --git a/src/dwarfs/util.cpp b/src/dwarfs/util.cpp index 6777fd29..be1aa13f 100644 --- a/src/dwarfs/util.cpp +++ b/src/dwarfs/util.cpp @@ -265,6 +265,12 @@ void utf8_truncate(std::string& str, size_t len) { str.resize(p - str.data()); } +void utf8_sanitize(std::string& str) { + if (!utf8::is_valid(str)) [[unlikely]] { + str = utf8::replace_invalid(str); + } +} + void shorten_path_string(std::string& path, char separator, size_t max_len) { auto len = utf8_display_width(path); diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index 9b29c5ef..c0e8a812 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -154,6 +154,7 @@ struct random_file_tree_options { int dimension{20}; int max_name_len{50}; bool with_errors{false}; + bool with_invalid_utf8{false}; }; class mkdwarfs_tester : public tester_common { @@ -193,10 +194,14 @@ class mkdwarfs_tester : public tester_common { std::mt19937_64 rng{42}; std::exponential_distribution<> size_dist{1 / opt.avg_size}; std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len}; + std::uniform_int_distribution<> invalid_dist{0, 1}; std::vector> paths; auto random_path_component = [&] { auto size = path_comp_size_dist(rng); + if (opt.with_invalid_utf8 && invalid_dist(rng) == 0) { + return test::create_random_string(size, 96, 255, rng); + } return test::create_random_string(size, 'A', 'Z', rng); }; @@ -213,7 +218,7 @@ class mkdwarfs_tester : public tester_common { auto size = std::min(max_size, static_cast(size_dist(rng))); std::string data; - if (rng() % 2 == 0) { + if (size < 1024 * 1024 && rng() % 2 == 0) { data = test::create_random_string(size, rng); } else { data = test::loremipsum(size); @@ -1734,7 +1739,14 @@ TEST_P(mkdwarfs_progress_test, basic) { t.iol->set_terminal_fancy(true); t.add_root_dir(); - t.add_random_file_tree(); + t.add_random_file_tree({ + .avg_size = 20.0 * 1024 * 1024, + .dimension = 2, +#ifndef _WIN32 + // Windows can't deal with non-UTF-8 filenames + .with_invalid_utf8 = true, +#endif + }); t.os->add_local_files(audio_data_dir); t.os->add_local_files(fits_data_dir);