fix(windows): handle invalid wide chars in file names (fixes gh #241)

For some reason, Windows allows invalid UTF-16 characters in file names.
Try to handle these gracefully when converting to UTF-8.
This commit is contained in:
Marcus Holland-Moritz 2024-10-10 19:45:24 +02:00
parent e8f084d183
commit 7431bb627c
5 changed files with 65 additions and 12 deletions

View File

@ -55,6 +55,7 @@ void utf8_sanitize(std::string& str);
void shorten_path_string(std::string& path, char separator, size_t max_len);
std::filesystem::path canonical_path(std::filesystem::path p);
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p);
bool getenv_is_enabled(char const* var);

View File

@ -121,6 +121,9 @@ class entry : public entry_interface {
private:
std::u8string u8name() const;
#ifdef _WIN32
std::filesystem::path path_;
#endif
std::string name_;
std::weak_ptr<entry> parent_;
file_stat stat_;

View File

@ -26,6 +26,7 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <type_traits>
#if __has_include(<utf8cpp/utf8.h>)
#include <utf8cpp/utf8.h>
@ -264,6 +265,25 @@ std::filesystem::path canonical_path(std::filesystem::path p) {
return p;
}
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p) {
#ifdef _WIN32
if constexpr (std::is_same_v<std::filesystem::path::value_type, wchar_t>) {
auto const& in = p.native();
if (in.empty()) {
return {};
}
int size_needed = ::WideCharToMultiByte(
CP_UTF8, 0, in.data(), (int)in.size(), NULL, 0, NULL, NULL);
std::string out(size_needed, 0);
::WideCharToMultiByte(CP_UTF8, 0, in.data(), (int)in.size(), &out[0],
size_needed, NULL, NULL);
return out;
}
#endif
return u8string_to_string(p.u8string());
}
bool getenv_is_enabled(char const* var) {
if (auto val = std::getenv(var)) {
if (auto maybeBool = try_to<bool>(val); maybeBool && *maybeBool) {

View File

@ -58,20 +58,19 @@ bool is_root_path(std::string_view path) {
#endif
}
std::string entry_name(fs::path const& path, bool has_parent) {
if (has_parent) {
return u8string_to_string(path.filename().u8string());
}
return u8string_to_string(path.u8string());
}
} // namespace
entry::entry(fs::path const& path, std::shared_ptr<entry> parent,
file_stat const& st)
: name_{entry_name(path, static_cast<bool>(parent))}
#ifdef _WIN32
: path_{parent ? path.filename() : path}
, name_{path_to_utf8_string_sanitized(path_)}
#else
: name_{path_to_utf8_string_sanitized(parent ? path.filename() : path)}
#endif
, parent_{std::move(parent)}
, stat_{st} {}
, stat_{st} {
}
bool entry::has_parent() const {
if (parent_.lock()) {
@ -88,11 +87,17 @@ void entry::set_name(const std::string& name) { name_ = name; }
std::u8string entry::u8name() const { return string_to_u8string(name_); }
fs::path entry::fs_path() const {
#ifdef _WIN32
fs::path self = path_;
#else
fs::path self = name_;
#endif
if (auto parent = parent_.lock()) {
return parent->fs_path() / u8name();
return parent->fs_path() / self;
}
return fs::path(u8name());
return self;
}
std::string entry::path_as_string() const {

View File

@ -31,6 +31,7 @@
#include <stdexcept>
#include <string>
#include <system_error>
#include <unordered_set>
#include <utility>
#include <vector>
@ -329,6 +330,7 @@ class scanner_ final : public scanner::impl {
os_access const& os_;
std::vector<std::unique_ptr<entry_filter>> filters_;
std::vector<std::unique_ptr<entry_transformer>> transformers_;
std::unordered_set<std::string> invalid_filenames_;
};
template <typename LoggerPolicy>
@ -361,6 +363,27 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
file_scanner& fs, bool debug_filter) {
try {
auto pe = entry_factory_.create(os_, name, parent);
if constexpr (!std::is_same_v<std::filesystem::path::value_type, char>) {
try {
auto tmp [[maybe_unused]] = name.filename().u8string();
} catch (std::system_error const& e) {
LOG_ERROR << fmt::format(
"invalid file name in \"{}\", storing as \"{}\": {}",
path_to_utf8_string_sanitized(name.parent_path()), pe->name(),
e.what());
prog.errors++;
if (!invalid_filenames_.emplace(path_to_utf8_string_sanitized(name))
.second) {
LOG_ERROR << fmt::format(
"cannot store \"{}\" as the name already exists", pe->name());
return nullptr;
}
}
}
bool const exclude =
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
return f->filter(*pe) == filter_action::remove;
@ -451,7 +474,8 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
return pe;
} catch (const std::system_error& e) {
LOG_ERROR << fmt::format("error reading entry (path={}): {}", name.string(),
LOG_ERROR << fmt::format("error reading entry (path={}): {}",
path_to_utf8_string_sanitized(name),
exception_str(e));
prog.errors++;
}