mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-03 17:56:12 -04:00
fix: sanitize UTF-8 strings, plus tests (fixes gh #191)
This commit is contained in:
parent
6d5f8dcab5
commit
6f738f8f02
@ -63,6 +63,7 @@ int call_sys_main_iolayer(std::span<std::string> args, iolayer const& iol,
|
||||
size_t utf8_display_width(char const* p, size_t len);
|
||||
size_t utf8_display_width(std::string const& str);
|
||||
void utf8_truncate(std::string& str, size_t len);
|
||||
void utf8_sanitize(std::string& str);
|
||||
|
||||
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
||||
|
||||
|
@ -91,9 +91,15 @@ void output_context_line(terminal const& term, std::ostream& os,
|
||||
|
||||
assert(width >= progress_w + speed_w + 1);
|
||||
|
||||
std::string path;
|
||||
if (st.path) {
|
||||
path = *st.path;
|
||||
utf8_sanitize(path);
|
||||
}
|
||||
|
||||
size_t status_w = width - (progress_w + speed_w + 1);
|
||||
auto path_len = st.path ? utf8_display_width(*st.path) : 0;
|
||||
size_t extra_len = st.path && !st.status_string.empty() ? 2 : 0;
|
||||
auto path_len = !path.empty() ? utf8_display_width(path) : 0;
|
||||
size_t extra_len = !path.empty() && !st.status_string.empty() ? 2 : 0;
|
||||
|
||||
if (status_w <
|
||||
st.context.size() + st.status_string.size() + path_len + extra_len) {
|
||||
@ -106,11 +112,10 @@ void output_context_line(terminal const& term, std::ostream& os,
|
||||
|
||||
if (max_path_len > 0) {
|
||||
shorten_path_string(
|
||||
*st.path,
|
||||
static_cast<char>(std::filesystem::path::preferred_separator),
|
||||
path, static_cast<char>(std::filesystem::path::preferred_separator),
|
||||
max_path_len);
|
||||
|
||||
path_len = utf8_display_width(*st.path);
|
||||
path_len = utf8_display_width(path);
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,7 +133,7 @@ void output_context_line(terminal const& term, std::ostream& os,
|
||||
if (!st.status_string.empty()) {
|
||||
st.status_string += ": ";
|
||||
}
|
||||
st.status_string += *st.path;
|
||||
st.status_string += path;
|
||||
}
|
||||
|
||||
std::string progress;
|
||||
|
@ -265,6 +265,7 @@ std::string status_string(progress const& p, size_t width) {
|
||||
label = "writing: ";
|
||||
path = i->any()->path_as_string();
|
||||
}
|
||||
utf8_sanitize(path);
|
||||
shorten_path_string(
|
||||
path, static_cast<char>(std::filesystem::path::preferred_separator),
|
||||
width - label.size());
|
||||
|
@ -265,6 +265,12 @@ void utf8_truncate(std::string& str, size_t len) {
|
||||
str.resize(p - str.data());
|
||||
}
|
||||
|
||||
void utf8_sanitize(std::string& str) {
|
||||
if (!utf8::is_valid(str)) [[unlikely]] {
|
||||
str = utf8::replace_invalid(str);
|
||||
}
|
||||
}
|
||||
|
||||
void shorten_path_string(std::string& path, char separator, size_t max_len) {
|
||||
auto len = utf8_display_width(path);
|
||||
|
||||
|
@ -154,6 +154,7 @@ struct random_file_tree_options {
|
||||
int dimension{20};
|
||||
int max_name_len{50};
|
||||
bool with_errors{false};
|
||||
bool with_invalid_utf8{false};
|
||||
};
|
||||
|
||||
class mkdwarfs_tester : public tester_common {
|
||||
@ -193,10 +194,14 @@ class mkdwarfs_tester : public tester_common {
|
||||
std::mt19937_64 rng{42};
|
||||
std::exponential_distribution<> size_dist{1 / opt.avg_size};
|
||||
std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len};
|
||||
std::uniform_int_distribution<> invalid_dist{0, 1};
|
||||
std::vector<std::pair<fs::path, std::string>> paths;
|
||||
|
||||
auto random_path_component = [&] {
|
||||
auto size = path_comp_size_dist(rng);
|
||||
if (opt.with_invalid_utf8 && invalid_dist(rng) == 0) {
|
||||
return test::create_random_string(size, 96, 255, rng);
|
||||
}
|
||||
return test::create_random_string(size, 'A', 'Z', rng);
|
||||
};
|
||||
|
||||
@ -213,7 +218,7 @@ class mkdwarfs_tester : public tester_common {
|
||||
auto size = std::min(max_size, static_cast<size_t>(size_dist(rng)));
|
||||
std::string data;
|
||||
|
||||
if (rng() % 2 == 0) {
|
||||
if (size < 1024 * 1024 && rng() % 2 == 0) {
|
||||
data = test::create_random_string(size, rng);
|
||||
} else {
|
||||
data = test::loremipsum(size);
|
||||
@ -1734,7 +1739,14 @@ TEST_P(mkdwarfs_progress_test, basic) {
|
||||
t.iol->set_terminal_fancy(true);
|
||||
|
||||
t.add_root_dir();
|
||||
t.add_random_file_tree();
|
||||
t.add_random_file_tree({
|
||||
.avg_size = 20.0 * 1024 * 1024,
|
||||
.dimension = 2,
|
||||
#ifndef _WIN32
|
||||
// Windows can't deal with non-UTF-8 filenames
|
||||
.with_invalid_utf8 = true,
|
||||
#endif
|
||||
});
|
||||
t.os->add_local_files(audio_data_dir);
|
||||
t.os->add_local_files(fits_data_dir);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user