mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 20:41:04 -04:00
fix(windows): handle invalid wide chars in file names (fixes gh #241)
For some reason, Windows allows invalid UTF-16 characters in file names. Try to handle these gracefully when converting to UTF-8.
This commit is contained in:
parent
e8f084d183
commit
7431bb627c
@ -55,6 +55,7 @@ void utf8_sanitize(std::string& str);
|
|||||||
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
void shorten_path_string(std::string& path, char separator, size_t max_len);
|
||||||
|
|
||||||
std::filesystem::path canonical_path(std::filesystem::path p);
|
std::filesystem::path canonical_path(std::filesystem::path p);
|
||||||
|
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p);
|
||||||
|
|
||||||
bool getenv_is_enabled(char const* var);
|
bool getenv_is_enabled(char const* var);
|
||||||
|
|
||||||
|
@ -121,6 +121,9 @@ class entry : public entry_interface {
|
|||||||
private:
|
private:
|
||||||
std::u8string u8name() const;
|
std::u8string u8name() const;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::filesystem::path path_;
|
||||||
|
#endif
|
||||||
std::string name_;
|
std::string name_;
|
||||||
std::weak_ptr<entry> parent_;
|
std::weak_ptr<entry> parent_;
|
||||||
file_stat stat_;
|
file_stat stat_;
|
||||||
|
20
src/util.cpp
20
src/util.cpp
@ -26,6 +26,7 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
#if __has_include(<utf8cpp/utf8.h>)
|
#if __has_include(<utf8cpp/utf8.h>)
|
||||||
#include <utf8cpp/utf8.h>
|
#include <utf8cpp/utf8.h>
|
||||||
@ -264,6 +265,25 @@ std::filesystem::path canonical_path(std::filesystem::path p) {
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string path_to_utf8_string_sanitized(std::filesystem::path const& p) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
if constexpr (std::is_same_v<std::filesystem::path::value_type, wchar_t>) {
|
||||||
|
auto const& in = p.native();
|
||||||
|
if (in.empty()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
int size_needed = ::WideCharToMultiByte(
|
||||||
|
CP_UTF8, 0, in.data(), (int)in.size(), NULL, 0, NULL, NULL);
|
||||||
|
std::string out(size_needed, 0);
|
||||||
|
::WideCharToMultiByte(CP_UTF8, 0, in.data(), (int)in.size(), &out[0],
|
||||||
|
size_needed, NULL, NULL);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return u8string_to_string(p.u8string());
|
||||||
|
}
|
||||||
|
|
||||||
bool getenv_is_enabled(char const* var) {
|
bool getenv_is_enabled(char const* var) {
|
||||||
if (auto val = std::getenv(var)) {
|
if (auto val = std::getenv(var)) {
|
||||||
if (auto maybeBool = try_to<bool>(val); maybeBool && *maybeBool) {
|
if (auto maybeBool = try_to<bool>(val); maybeBool && *maybeBool) {
|
||||||
|
@ -58,20 +58,19 @@ bool is_root_path(std::string_view path) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string entry_name(fs::path const& path, bool has_parent) {
|
|
||||||
if (has_parent) {
|
|
||||||
return u8string_to_string(path.filename().u8string());
|
|
||||||
}
|
|
||||||
return u8string_to_string(path.u8string());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
entry::entry(fs::path const& path, std::shared_ptr<entry> parent,
|
entry::entry(fs::path const& path, std::shared_ptr<entry> parent,
|
||||||
file_stat const& st)
|
file_stat const& st)
|
||||||
: name_{entry_name(path, static_cast<bool>(parent))}
|
#ifdef _WIN32
|
||||||
|
: path_{parent ? path.filename() : path}
|
||||||
|
, name_{path_to_utf8_string_sanitized(path_)}
|
||||||
|
#else
|
||||||
|
: name_{path_to_utf8_string_sanitized(parent ? path.filename() : path)}
|
||||||
|
#endif
|
||||||
, parent_{std::move(parent)}
|
, parent_{std::move(parent)}
|
||||||
, stat_{st} {}
|
, stat_{st} {
|
||||||
|
}
|
||||||
|
|
||||||
bool entry::has_parent() const {
|
bool entry::has_parent() const {
|
||||||
if (parent_.lock()) {
|
if (parent_.lock()) {
|
||||||
@ -88,11 +87,17 @@ void entry::set_name(const std::string& name) { name_ = name; }
|
|||||||
std::u8string entry::u8name() const { return string_to_u8string(name_); }
|
std::u8string entry::u8name() const { return string_to_u8string(name_); }
|
||||||
|
|
||||||
fs::path entry::fs_path() const {
|
fs::path entry::fs_path() const {
|
||||||
|
#ifdef _WIN32
|
||||||
|
fs::path self = path_;
|
||||||
|
#else
|
||||||
|
fs::path self = name_;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (auto parent = parent_.lock()) {
|
if (auto parent = parent_.lock()) {
|
||||||
return parent->fs_path() / u8name();
|
return parent->fs_path() / self;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fs::path(u8name());
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string entry::path_as_string() const {
|
std::string entry::path_as_string() const {
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <system_error>
|
#include <system_error>
|
||||||
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -329,6 +330,7 @@ class scanner_ final : public scanner::impl {
|
|||||||
os_access const& os_;
|
os_access const& os_;
|
||||||
std::vector<std::unique_ptr<entry_filter>> filters_;
|
std::vector<std::unique_ptr<entry_filter>> filters_;
|
||||||
std::vector<std::unique_ptr<entry_transformer>> transformers_;
|
std::vector<std::unique_ptr<entry_transformer>> transformers_;
|
||||||
|
std::unordered_set<std::string> invalid_filenames_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -361,6 +363,27 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
file_scanner& fs, bool debug_filter) {
|
file_scanner& fs, bool debug_filter) {
|
||||||
try {
|
try {
|
||||||
auto pe = entry_factory_.create(os_, name, parent);
|
auto pe = entry_factory_.create(os_, name, parent);
|
||||||
|
|
||||||
|
if constexpr (!std::is_same_v<std::filesystem::path::value_type, char>) {
|
||||||
|
try {
|
||||||
|
auto tmp [[maybe_unused]] = name.filename().u8string();
|
||||||
|
} catch (std::system_error const& e) {
|
||||||
|
LOG_ERROR << fmt::format(
|
||||||
|
"invalid file name in \"{}\", storing as \"{}\": {}",
|
||||||
|
path_to_utf8_string_sanitized(name.parent_path()), pe->name(),
|
||||||
|
e.what());
|
||||||
|
|
||||||
|
prog.errors++;
|
||||||
|
|
||||||
|
if (!invalid_filenames_.emplace(path_to_utf8_string_sanitized(name))
|
||||||
|
.second) {
|
||||||
|
LOG_ERROR << fmt::format(
|
||||||
|
"cannot store \"{}\" as the name already exists", pe->name());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool const exclude =
|
bool const exclude =
|
||||||
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
|
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
|
||||||
return f->filter(*pe) == filter_action::remove;
|
return f->filter(*pe) == filter_action::remove;
|
||||||
@ -451,7 +474,8 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
|
|
||||||
return pe;
|
return pe;
|
||||||
} catch (const std::system_error& e) {
|
} catch (const std::system_error& e) {
|
||||||
LOG_ERROR << fmt::format("error reading entry (path={}): {}", name.string(),
|
LOG_ERROR << fmt::format("error reading entry (path={}): {}",
|
||||||
|
path_to_utf8_string_sanitized(name),
|
||||||
exception_str(e));
|
exception_str(e));
|
||||||
prog.errors++;
|
prog.errors++;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user