mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
fix: sanitize UTF-8 strings, plus tests (fixes gh #191)
This commit is contained in:
parent
6d5f8dcab5
commit
6f738f8f02
@ -63,6 +63,7 @@ int call_sys_main_iolayer(std::span<std::string> args, iolayer const& iol,
|
|||||||
size_t utf8_display_width(char const* p, size_t len);
|
size_t utf8_display_width(char const* p, size_t len);
|
||||||
size_t utf8_display_width(std::string const& str);
|
size_t utf8_display_width(std::string const& str);
|
||||||
void utf8_truncate(std::string& str, size_t len);
|
void utf8_truncate(std::string& str, size_t len);
|
||||||
|
void utf8_sanitize(std::string& str);
|
||||||
|
|
||||||
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
||||||
|
|
||||||
|
@ -91,9 +91,15 @@ void output_context_line(terminal const& term, std::ostream& os,
|
|||||||
|
|
||||||
assert(width >= progress_w + speed_w + 1);
|
assert(width >= progress_w + speed_w + 1);
|
||||||
|
|
||||||
|
std::string path;
|
||||||
|
if (st.path) {
|
||||||
|
path = *st.path;
|
||||||
|
utf8_sanitize(path);
|
||||||
|
}
|
||||||
|
|
||||||
size_t status_w = width - (progress_w + speed_w + 1);
|
size_t status_w = width - (progress_w + speed_w + 1);
|
||||||
auto path_len = st.path ? utf8_display_width(*st.path) : 0;
|
auto path_len = !path.empty() ? utf8_display_width(path) : 0;
|
||||||
size_t extra_len = st.path && !st.status_string.empty() ? 2 : 0;
|
size_t extra_len = !path.empty() && !st.status_string.empty() ? 2 : 0;
|
||||||
|
|
||||||
if (status_w <
|
if (status_w <
|
||||||
st.context.size() + st.status_string.size() + path_len + extra_len) {
|
st.context.size() + st.status_string.size() + path_len + extra_len) {
|
||||||
@ -106,11 +112,10 @@ void output_context_line(terminal const& term, std::ostream& os,
|
|||||||
|
|
||||||
if (max_path_len > 0) {
|
if (max_path_len > 0) {
|
||||||
shorten_path_string(
|
shorten_path_string(
|
||||||
*st.path,
|
path, static_cast<char>(std::filesystem::path::preferred_separator),
|
||||||
static_cast<char>(std::filesystem::path::preferred_separator),
|
|
||||||
max_path_len);
|
max_path_len);
|
||||||
|
|
||||||
path_len = utf8_display_width(*st.path);
|
path_len = utf8_display_width(path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +133,7 @@ void output_context_line(terminal const& term, std::ostream& os,
|
|||||||
if (!st.status_string.empty()) {
|
if (!st.status_string.empty()) {
|
||||||
st.status_string += ": ";
|
st.status_string += ": ";
|
||||||
}
|
}
|
||||||
st.status_string += *st.path;
|
st.status_string += path;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string progress;
|
std::string progress;
|
||||||
|
@ -265,6 +265,7 @@ std::string status_string(progress const& p, size_t width) {
|
|||||||
label = "writing: ";
|
label = "writing: ";
|
||||||
path = i->any()->path_as_string();
|
path = i->any()->path_as_string();
|
||||||
}
|
}
|
||||||
|
utf8_sanitize(path);
|
||||||
shorten_path_string(
|
shorten_path_string(
|
||||||
path, static_cast<char>(std::filesystem::path::preferred_separator),
|
path, static_cast<char>(std::filesystem::path::preferred_separator),
|
||||||
width - label.size());
|
width - label.size());
|
||||||
|
@ -265,6 +265,12 @@ void utf8_truncate(std::string& str, size_t len) {
|
|||||||
str.resize(p - str.data());
|
str.resize(p - str.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void utf8_sanitize(std::string& str) {
|
||||||
|
if (!utf8::is_valid(str)) [[unlikely]] {
|
||||||
|
str = utf8::replace_invalid(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void shorten_path_string(std::string& path, char separator, size_t max_len) {
|
void shorten_path_string(std::string& path, char separator, size_t max_len) {
|
||||||
auto len = utf8_display_width(path);
|
auto len = utf8_display_width(path);
|
||||||
|
|
||||||
|
@ -154,6 +154,7 @@ struct random_file_tree_options {
|
|||||||
int dimension{20};
|
int dimension{20};
|
||||||
int max_name_len{50};
|
int max_name_len{50};
|
||||||
bool with_errors{false};
|
bool with_errors{false};
|
||||||
|
bool with_invalid_utf8{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
class mkdwarfs_tester : public tester_common {
|
class mkdwarfs_tester : public tester_common {
|
||||||
@ -193,10 +194,14 @@ class mkdwarfs_tester : public tester_common {
|
|||||||
std::mt19937_64 rng{42};
|
std::mt19937_64 rng{42};
|
||||||
std::exponential_distribution<> size_dist{1 / opt.avg_size};
|
std::exponential_distribution<> size_dist{1 / opt.avg_size};
|
||||||
std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len};
|
std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len};
|
||||||
|
std::uniform_int_distribution<> invalid_dist{0, 1};
|
||||||
std::vector<std::pair<fs::path, std::string>> paths;
|
std::vector<std::pair<fs::path, std::string>> paths;
|
||||||
|
|
||||||
auto random_path_component = [&] {
|
auto random_path_component = [&] {
|
||||||
auto size = path_comp_size_dist(rng);
|
auto size = path_comp_size_dist(rng);
|
||||||
|
if (opt.with_invalid_utf8 && invalid_dist(rng) == 0) {
|
||||||
|
return test::create_random_string(size, 96, 255, rng);
|
||||||
|
}
|
||||||
return test::create_random_string(size, 'A', 'Z', rng);
|
return test::create_random_string(size, 'A', 'Z', rng);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -213,7 +218,7 @@ class mkdwarfs_tester : public tester_common {
|
|||||||
auto size = std::min(max_size, static_cast<size_t>(size_dist(rng)));
|
auto size = std::min(max_size, static_cast<size_t>(size_dist(rng)));
|
||||||
std::string data;
|
std::string data;
|
||||||
|
|
||||||
if (rng() % 2 == 0) {
|
if (size < 1024 * 1024 && rng() % 2 == 0) {
|
||||||
data = test::create_random_string(size, rng);
|
data = test::create_random_string(size, rng);
|
||||||
} else {
|
} else {
|
||||||
data = test::loremipsum(size);
|
data = test::loremipsum(size);
|
||||||
@ -1734,7 +1739,14 @@ TEST_P(mkdwarfs_progress_test, basic) {
|
|||||||
t.iol->set_terminal_fancy(true);
|
t.iol->set_terminal_fancy(true);
|
||||||
|
|
||||||
t.add_root_dir();
|
t.add_root_dir();
|
||||||
t.add_random_file_tree();
|
t.add_random_file_tree({
|
||||||
|
.avg_size = 20.0 * 1024 * 1024,
|
||||||
|
.dimension = 2,
|
||||||
|
#ifndef _WIN32
|
||||||
|
// Windows can't deal with non-UTF-8 filenames
|
||||||
|
.with_invalid_utf8 = true,
|
||||||
|
#endif
|
||||||
|
});
|
||||||
t.os->add_local_files(audio_data_dir);
|
t.os->add_local_files(audio_data_dir);
|
||||||
t.os->add_local_files(fits_data_dir);
|
t.os->add_local_files(fits_data_dir);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user