mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 04:19:10 -04:00
fix(windows): handle invalid wide chars in file names (fixes gh #241)
For some reason, Windows allows invalid UTF-16 characters in file names. Try to handle these gracefully when converting to UTF-8.
This commit is contained in:
parent
e8f084d183
commit
7431bb627c
@ -55,6 +55,7 @@ void utf8_sanitize(std::string& str);
|
||||
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
||||
|
||||
std::filesystem::path canonical_path(std::filesystem::path p);
|
||||
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p);
|
||||
|
||||
bool getenv_is_enabled(char const* var);
|
||||
|
||||
|
@ -121,6 +121,9 @@ class entry : public entry_interface {
|
||||
private:
|
||||
std::u8string u8name() const;
|
||||
|
||||
#ifdef _WIN32
|
||||
std::filesystem::path path_;
|
||||
#endif
|
||||
std::string name_;
|
||||
std::weak_ptr<entry> parent_;
|
||||
file_stat stat_;
|
||||
|
20
src/util.cpp
20
src/util.cpp
@ -26,6 +26,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <type_traits>
|
||||
|
||||
#if __has_include(<utf8cpp/utf8.h>)
|
||||
#include <utf8cpp/utf8.h>
|
||||
@ -264,6 +265,25 @@ std::filesystem::path canonical_path(std::filesystem::path p) {
|
||||
return p;
|
||||
}
|
||||
|
||||
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p) {
|
||||
#ifdef _WIN32
|
||||
if constexpr (std::is_same_v<std::filesystem::path::value_type, wchar_t>) {
|
||||
auto const& in = p.native();
|
||||
if (in.empty()) {
|
||||
return {};
|
||||
}
|
||||
int size_needed = ::WideCharToMultiByte(
|
||||
CP_UTF8, 0, in.data(), (int)in.size(), NULL, 0, NULL, NULL);
|
||||
std::string out(size_needed, 0);
|
||||
::WideCharToMultiByte(CP_UTF8, 0, in.data(), (int)in.size(), &out[0],
|
||||
size_needed, NULL, NULL);
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
return u8string_to_string(p.u8string());
|
||||
}
|
||||
|
||||
bool getenv_is_enabled(char const* var) {
|
||||
if (auto val = std::getenv(var)) {
|
||||
if (auto maybeBool = try_to<bool>(val); maybeBool && *maybeBool) {
|
||||
|
@ -58,20 +58,19 @@ bool is_root_path(std::string_view path) {
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string entry_name(fs::path const& path, bool has_parent) {
|
||||
if (has_parent) {
|
||||
return u8string_to_string(path.filename().u8string());
|
||||
}
|
||||
return u8string_to_string(path.u8string());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
entry::entry(fs::path const& path, std::shared_ptr<entry> parent,
|
||||
file_stat const& st)
|
||||
: name_{entry_name(path, static_cast<bool>(parent))}
|
||||
#ifdef _WIN32
|
||||
: path_{parent ? path.filename() : path}
|
||||
, name_{path_to_utf8_string_sanitized(path_)}
|
||||
#else
|
||||
: name_{path_to_utf8_string_sanitized(parent ? path.filename() : path)}
|
||||
#endif
|
||||
, parent_{std::move(parent)}
|
||||
, stat_{st} {}
|
||||
, stat_{st} {
|
||||
}
|
||||
|
||||
bool entry::has_parent() const {
|
||||
if (parent_.lock()) {
|
||||
@ -88,11 +87,17 @@ void entry::set_name(const std::string& name) { name_ = name; }
|
||||
std::u8string entry::u8name() const { return string_to_u8string(name_); }
|
||||
|
||||
fs::path entry::fs_path() const {
|
||||
#ifdef _WIN32
|
||||
fs::path self = path_;
|
||||
#else
|
||||
fs::path self = name_;
|
||||
#endif
|
||||
|
||||
if (auto parent = parent_.lock()) {
|
||||
return parent->fs_path() / u8name();
|
||||
return parent->fs_path() / self;
|
||||
}
|
||||
|
||||
return fs::path(u8name());
|
||||
return self;
|
||||
}
|
||||
|
||||
std::string entry::path_as_string() const {
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@ -329,6 +330,7 @@ class scanner_ final : public scanner::impl {
|
||||
os_access const& os_;
|
||||
std::vector<std::unique_ptr<entry_filter>> filters_;
|
||||
std::vector<std::unique_ptr<entry_transformer>> transformers_;
|
||||
std::unordered_set<std::string> invalid_filenames_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -361,6 +363,27 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
file_scanner& fs, bool debug_filter) {
|
||||
try {
|
||||
auto pe = entry_factory_.create(os_, name, parent);
|
||||
|
||||
if constexpr (!std::is_same_v<std::filesystem::path::value_type, char>) {
|
||||
try {
|
||||
auto tmp [[maybe_unused]] = name.filename().u8string();
|
||||
} catch (std::system_error const& e) {
|
||||
LOG_ERROR << fmt::format(
|
||||
"invalid file name in \"{}\", storing as \"{}\": {}",
|
||||
path_to_utf8_string_sanitized(name.parent_path()), pe->name(),
|
||||
e.what());
|
||||
|
||||
prog.errors++;
|
||||
|
||||
if (!invalid_filenames_.emplace(path_to_utf8_string_sanitized(name))
|
||||
.second) {
|
||||
LOG_ERROR << fmt::format(
|
||||
"cannot store \"{}\" as the name already exists", pe->name());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool const exclude =
|
||||
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
|
||||
return f->filter(*pe) == filter_action::remove;
|
||||
@ -451,7 +474,8 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
|
||||
return pe;
|
||||
} catch (const std::system_error& e) {
|
||||
LOG_ERROR << fmt::format("error reading entry (path={}): {}", name.string(),
|
||||
LOG_ERROR << fmt::format("error reading entry (path={}): {}",
|
||||
path_to_utf8_string_sanitized(name),
|
||||
exception_str(e));
|
||||
prog.errors++;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user