mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-03 17:56:12 -04:00
feat: support case-insensitive lookups (fixes gh #232)
This commit is contained in:
parent
f01f4a4ed2
commit
b349e584e0
38
.maintainer-scripts/unicode-case-fold.pl
Executable file
38
.maintainer-scripts/unicode-case-fold.pl
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
# Input data: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
|
||||||
|
|
||||||
|
my %map;
|
||||||
|
|
||||||
|
while (<>) {
|
||||||
|
chomp;
|
||||||
|
next if /^(#|\s*$)/;
|
||||||
|
my($char, $status, $fold, $comment) = split /\s*;\s*/;
|
||||||
|
if ($status =~ /^[CS]$/) {
|
||||||
|
$comment =~ s/^#\s*//;
|
||||||
|
# print " case 0x$char: return 0x$fold; // [$status] $comment\n";
|
||||||
|
$map{hex($char)} = hex($fold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my @valid_code_points = (0..0xD7FF, 0xE000..0x10FFFF);
|
||||||
|
|
||||||
|
sub cp_to_str {
|
||||||
|
my $cp = shift;
|
||||||
|
my $fmt = $cp < 0x10000 ? "\\u%04X" : "\\U%08X";
|
||||||
|
return sprintf $fmt, $cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (@valid_code_points) {
|
||||||
|
my @cps = splice @valid_code_points, 0, 256;
|
||||||
|
my $orig;
|
||||||
|
my $folded;
|
||||||
|
for my $cp (@cps) {
|
||||||
|
my $fold = $map{$cp} // $cp;
|
||||||
|
$orig .= cp_to_str($cp);
|
||||||
|
$folded .= cp_to_str($fold);
|
||||||
|
}
|
||||||
|
print " {u8\"$orig\"sv, u8\"$folded\"sv},\n" if $orig ne $folded;
|
||||||
|
}
|
@ -450,6 +450,7 @@ if(WITH_TESTS)
|
|||||||
pcmaudio_categorizer_test
|
pcmaudio_categorizer_test
|
||||||
speedometer_test
|
speedometer_test
|
||||||
terminal_test
|
terminal_test
|
||||||
|
unicode_test
|
||||||
utils_test
|
utils_test
|
||||||
file_utils_test
|
file_utils_test
|
||||||
worker_group_test
|
worker_group_test
|
||||||
|
@ -49,6 +49,7 @@ add_library(
|
|||||||
src/internal/fs_section.cpp
|
src/internal/fs_section.cpp
|
||||||
src/internal/glob_to_regex.cpp
|
src/internal/glob_to_regex.cpp
|
||||||
src/internal/string_table.cpp
|
src/internal/string_table.cpp
|
||||||
|
src/internal/unicode_case_folding.cpp
|
||||||
src/internal/wcwidth.c
|
src/internal/wcwidth.c
|
||||||
src/internal/worker_group.cpp
|
src/internal/worker_group.cpp
|
||||||
|
|
||||||
|
@ -104,6 +104,17 @@ options:
|
|||||||
overlays and want the file system to reflect its read-only
|
overlays and want the file system to reflect its read-only
|
||||||
state, you can set this option.
|
state, you can set this option.
|
||||||
|
|
||||||
|
- `-o case_insensitive`:
|
||||||
|
Perform case-insensitive lookups in the mounted file system,
|
||||||
|
i.e. an entry orignally named `ReadMe.txt` can be accessed as
|
||||||
|
`readme.txt`, `README.TXT`, or `rEaDmE.tXt`. This works across
|
||||||
|
all platforms. When mounting a file system with many files, this
|
||||||
|
may be slightly slower and consume slightly more memory as case-
|
||||||
|
insensitive lookup requires an additional mapping table that is
|
||||||
|
built on-demand. Note that this is not supported if the file
|
||||||
|
system contains directories with entries that only differ in
|
||||||
|
case.
|
||||||
|
|
||||||
- `-o (no_)cache_image`:
|
- `-o (no_)cache_image`:
|
||||||
By default, `dwarfs` tries to ensure that the compressed file
|
By default, `dwarfs` tries to ensure that the compressed file
|
||||||
system image will not be cached by the kernel (i.e. the default
|
system image will not be cached by the kernel (i.e. the default
|
||||||
|
32
include/dwarfs/internal/unicode_case_folding.h
Normal file
32
include/dwarfs/internal/unicode_case_folding.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
namespace dwarfs::internal {
|
||||||
|
|
||||||
|
std::string utf8_case_fold(std::string_view in);
|
||||||
|
std::string utf8_case_fold_unchecked(std::string_view in);
|
||||||
|
|
||||||
|
} // namespace dwarfs::internal
|
@ -32,6 +32,7 @@ struct metadata_options {
|
|||||||
bool enable_nlink{false};
|
bool enable_nlink{false};
|
||||||
bool readonly{false};
|
bool readonly{false};
|
||||||
bool check_consistency{false};
|
bool check_consistency{false};
|
||||||
|
bool case_insensitive_lookup{false};
|
||||||
size_t block_size{512};
|
size_t block_size{512};
|
||||||
std::optional<file_stat::uid_type> fs_uid{};
|
std::optional<file_stat::uid_type> fs_uid{};
|
||||||
std::optional<file_stat::gid_type> fs_gid{};
|
std::optional<file_stat::gid_type> fs_gid{};
|
||||||
|
3027
src/internal/unicode_case_folding.cpp
Normal file
3027
src/internal/unicode_case_folding.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -32,19 +32,28 @@
|
|||||||
|
|
||||||
#include <boost/algorithm/string.hpp>
|
#include <boost/algorithm/string.hpp>
|
||||||
|
|
||||||
|
#include <boost/sort/flat_stable_sort/flat_stable_sort.hpp>
|
||||||
|
|
||||||
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
|
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
|
||||||
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
||||||
#include <thrift/lib/cpp2/protocol/Serializer.h>
|
#include <thrift/lib/cpp2/protocol/Serializer.h>
|
||||||
|
|
||||||
#include <fmt/chrono.h>
|
#include <fmt/chrono.h>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
#if FMT_VERSION >= 110000
|
||||||
|
#include <fmt/ranges.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <folly/container/F14Set.h>
|
#include <folly/container/F14Set.h>
|
||||||
#include <folly/portability/Stdlib.h>
|
#include <folly/portability/Stdlib.h>
|
||||||
#include <folly/portability/Unistd.h>
|
#include <folly/portability/Unistd.h>
|
||||||
|
#include <folly/small_vector.h>
|
||||||
#include <folly/stats/Histogram.h>
|
#include <folly/stats/Histogram.h>
|
||||||
|
|
||||||
|
#include <parallel_hashmap/phmap.h>
|
||||||
|
|
||||||
#include <range/v3/view/enumerate.hpp>
|
#include <range/v3/view/enumerate.hpp>
|
||||||
|
#include <range/v3/view/transform.hpp>
|
||||||
|
|
||||||
#include <dwarfs/error.h>
|
#include <dwarfs/error.h>
|
||||||
#include <dwarfs/file_stat.h>
|
#include <dwarfs/file_stat.h>
|
||||||
@ -60,6 +69,7 @@
|
|||||||
#include <dwarfs/internal/features.h>
|
#include <dwarfs/internal/features.h>
|
||||||
#include <dwarfs/internal/packed_int_vector.h>
|
#include <dwarfs/internal/packed_int_vector.h>
|
||||||
#include <dwarfs/internal/string_table.h>
|
#include <dwarfs/internal/string_table.h>
|
||||||
|
#include <dwarfs/internal/unicode_case_folding.h>
|
||||||
#include <dwarfs/reader/internal/metadata_v2.h>
|
#include <dwarfs/reader/internal/metadata_v2.h>
|
||||||
|
|
||||||
#include <dwarfs/gen-cpp2/metadata_layouts.h>
|
#include <dwarfs/gen-cpp2/metadata_layouts.h>
|
||||||
@ -415,7 +425,7 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
, symlinks_(meta_.compact_symlinks()
|
, symlinks_(meta_.compact_symlinks()
|
||||||
? string_table(lgr, "symlinks", *meta_.compact_symlinks())
|
? string_table(lgr, "symlinks", *meta_.compact_symlinks())
|
||||||
: string_table(meta_.symlinks()))
|
: string_table(meta_.symlinks()))
|
||||||
// clang-format off
|
, dir_icase_cache_{build_dir_icase_cache()} // clang-format off
|
||||||
PERFMON_CLS_PROXY_INIT(perfmon, "metadata_v2")
|
PERFMON_CLS_PROXY_INIT(perfmon, "metadata_v2")
|
||||||
PERFMON_CLS_TIMER_INIT(find)
|
PERFMON_CLS_TIMER_INIT(find)
|
||||||
PERFMON_CLS_TIMER_INIT(getattr)
|
PERFMON_CLS_TIMER_INIT(getattr)
|
||||||
@ -661,6 +671,10 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
nlohmann::json as_json(dir_entry_view entry) const;
|
nlohmann::json as_json(dir_entry_view entry) const;
|
||||||
nlohmann::json as_json(directory_view dir, dir_entry_view entry) const;
|
nlohmann::json as_json(directory_view dir, dir_entry_view entry) const;
|
||||||
|
|
||||||
|
std::optional<dir_entry_view>
|
||||||
|
find_impl(directory_view dir, auto const& range, auto const& name,
|
||||||
|
auto const& index_map, auto const& entry_name_transform) const;
|
||||||
|
|
||||||
std::optional<dir_entry_view>
|
std::optional<dir_entry_view>
|
||||||
find(directory_view dir, std::string_view name) const;
|
find(directory_view dir, std::string_view name) const;
|
||||||
|
|
||||||
@ -920,6 +934,79 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
return packed_nlinks;
|
return packed_nlinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<packed_int_vector<uint32_t>> build_dir_icase_cache() const {
|
||||||
|
std::vector<packed_int_vector<uint32_t>> cache;
|
||||||
|
|
||||||
|
if (options_.case_insensitive_lookup) {
|
||||||
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
size_t num_cached_dirs = 0;
|
||||||
|
size_t num_cached_files = 0;
|
||||||
|
size_t total_cache_size = 0;
|
||||||
|
|
||||||
|
cache.resize(meta_.directories().size());
|
||||||
|
|
||||||
|
for (uint32_t inode = 0; inode < meta_.directories().size() - 1;
|
||||||
|
++inode) {
|
||||||
|
directory_view dir{inode, global_};
|
||||||
|
auto range = dir.entry_range();
|
||||||
|
|
||||||
|
// Cache the folded names of the directory entries; this significantly
|
||||||
|
// speeds up the sorting code.
|
||||||
|
std::vector<std::string> names(range.size());
|
||||||
|
std::transform(range.begin(), range.end(), names.begin(), [&](auto ix) {
|
||||||
|
return utf8_case_fold_unchecked(
|
||||||
|
dir_entry_view_impl::name(ix, global_));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check and report any collisions in the directory
|
||||||
|
phmap::flat_hash_map<std::string_view, folly::small_vector<uint32_t, 1>>
|
||||||
|
collisions;
|
||||||
|
collisions.reserve(range.size());
|
||||||
|
for (size_t i = 0; i < names.size(); ++i) {
|
||||||
|
collisions[names[i]].push_back(i);
|
||||||
|
}
|
||||||
|
for (auto& [name, indices] : collisions) {
|
||||||
|
if (indices.size() > 1) {
|
||||||
|
LOG_WARN << fmt::format(
|
||||||
|
"case-insensitive collision in directory \"{}\" (inode={}): {}",
|
||||||
|
dir.self_entry_view().unix_path(), inode,
|
||||||
|
fmt::join(indices | ranges::views::transform([&](auto i) {
|
||||||
|
return dir_entry_view_impl::name(range[i], global_);
|
||||||
|
}),
|
||||||
|
", "));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's faster to check here if the folded names are sorted than to
|
||||||
|
// check later if the indices in `entries` are sorted.
|
||||||
|
if (!std::is_sorted(names.begin(), names.end())) {
|
||||||
|
std::vector<uint32_t> entries(range.size());
|
||||||
|
std::iota(entries.begin(), entries.end(), 0);
|
||||||
|
boost::sort::flat_stable_sort(
|
||||||
|
entries.begin(), entries.end(),
|
||||||
|
[&](auto a, auto b) { return names[a] < names[b]; });
|
||||||
|
auto& pv = cache[inode];
|
||||||
|
pv.reset(std::bit_width(entries.size()), entries.size());
|
||||||
|
for (size_t i = 0; i < entries.size(); ++i) {
|
||||||
|
pv.set(i, entries[i]);
|
||||||
|
}
|
||||||
|
++num_cached_dirs;
|
||||||
|
num_cached_files += entries.size();
|
||||||
|
total_cache_size += pv.size_in_bytes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ti << "built case-insensitive directory cache for " << num_cached_files
|
||||||
|
<< " entries in " << num_cached_dirs << " out of "
|
||||||
|
<< meta_.directories().size() - 1 << " directories ("
|
||||||
|
<< size_with_unit(total_cache_size +
|
||||||
|
sizeof(decltype(cache)::value_type) * cache.size())
|
||||||
|
<< ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
size_t total_file_entries() const {
|
size_t total_file_entries() const {
|
||||||
return (dev_inode_offset_ - file_inode_offset_) +
|
return (dev_inode_offset_ - file_inode_offset_) +
|
||||||
(meta_.dir_entries()
|
(meta_.dir_entries()
|
||||||
@ -943,6 +1030,7 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
const int unique_files_;
|
const int unique_files_;
|
||||||
const metadata_options options_;
|
const metadata_options options_;
|
||||||
const string_table symlinks_;
|
const string_table symlinks_;
|
||||||
|
std::vector<packed_int_vector<uint32_t>> const dir_icase_cache_;
|
||||||
PERFMON_CLS_PROXY_DECL
|
PERFMON_CLS_PROXY_DECL
|
||||||
PERFMON_CLS_TIMER_DECL(find)
|
PERFMON_CLS_TIMER_DECL(find)
|
||||||
PERFMON_CLS_TIMER_DECL(getattr)
|
PERFMON_CLS_TIMER_DECL(getattr)
|
||||||
@ -1687,6 +1775,31 @@ void metadata_<LoggerPolicy>::walk_data_order_impl(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
std::optional<dir_entry_view>
|
||||||
|
metadata_<LoggerPolicy>::find_impl(directory_view dir, auto const& range,
|
||||||
|
auto const& name, auto const& index_map,
|
||||||
|
auto const& entry_name_transform) const {
|
||||||
|
auto entry_name = [&](auto ix) {
|
||||||
|
return entry_name_transform(dir_entry_view_impl::name(ix, global_));
|
||||||
|
};
|
||||||
|
|
||||||
|
auto it = std::lower_bound(range.begin(), range.end(), name,
|
||||||
|
[&](auto ix, auto const& name) {
|
||||||
|
return entry_name(index_map(ix)) < name;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (it != range.end()) {
|
||||||
|
auto ix = index_map(*it);
|
||||||
|
if (entry_name(ix) == name) {
|
||||||
|
return dir_entry_view{dir_entry_view_impl::from_dir_entry_index_shared(
|
||||||
|
ix, global_.self_dir_entry(dir.inode()), global_)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
std::optional<dir_entry_view>
|
std::optional<dir_entry_view>
|
||||||
metadata_<LoggerPolicy>::find(directory_view dir, std::string_view name) const {
|
metadata_<LoggerPolicy>::find(directory_view dir, std::string_view name) const {
|
||||||
@ -1694,19 +1807,21 @@ metadata_<LoggerPolicy>::find(directory_view dir, std::string_view name) const {
|
|||||||
|
|
||||||
auto range = dir.entry_range();
|
auto range = dir.entry_range();
|
||||||
|
|
||||||
auto it = std::lower_bound(
|
if (!options_.case_insensitive_lookup) {
|
||||||
range.begin(), range.end(), name, [&](auto ix, std::string_view name) {
|
return find_impl(dir, range, name, std::identity{}, std::identity{});
|
||||||
return internal::dir_entry_view_impl::name(ix, global_) < name;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (it != range.end()) {
|
|
||||||
if (internal::dir_entry_view_impl::name(*it, global_) == name) {
|
|
||||||
return dir_entry_view{dir_entry_view_impl::from_dir_entry_index_shared(
|
|
||||||
*it, global_.self_dir_entry(dir.inode()), global_)};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::nullopt;
|
auto const& cache = dir_icase_cache_[dir.inode()];
|
||||||
|
|
||||||
|
return find_impl(
|
||||||
|
dir, boost::irange(range.size()), utf8_case_fold(name),
|
||||||
|
[&cache, &range](auto ix) {
|
||||||
|
if (!cache.empty()) {
|
||||||
|
ix = cache[ix];
|
||||||
|
}
|
||||||
|
return range[ix];
|
||||||
|
},
|
||||||
|
utf8_case_fold_unchecked);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
// This needs to be included *after* gtest.h
|
// This needs to be included *after* gtest.h
|
||||||
@ -2054,3 +2055,183 @@ TEST(filesystem, multi_image) {
|
|||||||
EXPECT_EQ("baz", fs.read_string(fs.open(baz->inode())));
|
EXPECT_EQ("baz", fs.read_string(fs.open(baz->inode())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(filesystem, case_insensitive_lookup) {
|
||||||
|
auto input = std::make_shared<test::os_access_mock>();
|
||||||
|
|
||||||
|
input->add_dir("");
|
||||||
|
input->add_dir(u8"hEllÖwÖrLD");
|
||||||
|
input->add_dir(u8"FÜñKÿStrÍñg");
|
||||||
|
input->add_dir(u8"unícødérøcks");
|
||||||
|
input->add_dir(u8"JÄLAPEÑOPEPPÉR");
|
||||||
|
input->add_dir(u8"SpIcYsÜsHiRoLL");
|
||||||
|
input->add_dir(u8"CAFÉMØCHAlatte");
|
||||||
|
input->add_dir(u8"ČhàŧGƤŦ");
|
||||||
|
input->add_dir(u8"lõREMÏpSüM");
|
||||||
|
input->add_dir(u8"ŠåmpŁËŠTrInG");
|
||||||
|
input->add_dir(u8"pythonprogramming");
|
||||||
|
input->add_dir(u8"DÃTâScïÊNcË");
|
||||||
|
input->add_dir(u8"AIISFÛTÛRË");
|
||||||
|
input->add_dir(u8"readability");
|
||||||
|
input->add_file(u8"TëStCãSeSçÉNâRïÖ", "testcasescenario");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/ÆSTHETÎCcøding", "aestheticcoding");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/smîLëyFÀÇë😊", "smileyface");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/NØRTHèast", "northeast");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/SPACEadventure", "spaceadventure");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/cõMPLEXïTy🚀", "complexity");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/thisisatest", "thisisatest");
|
||||||
|
input->add_file(u8"lõREMÏpSüM/thisISaTEST", "thisisatest");
|
||||||
|
|
||||||
|
std::vector<std::u8string> case_sensitive_dirs{
|
||||||
|
u8"/hEllÖwÖrLD", u8"/FÜñKÿStrÍñg", u8"/unícødérøcks",
|
||||||
|
u8"/JÄLAPEÑOPEPPÉR", u8"/SpIcYsÜsHiRoLL", u8"/CAFÉMØCHAlatte",
|
||||||
|
u8"/ČhàŧGƤŦ", u8"/lõREMÏpSüM", u8"/ŠåmpŁËŠTrInG",
|
||||||
|
u8"/pythonprogramming", u8"/DÃTâScïÊNcË", u8"/AIISFÛTÛRË",
|
||||||
|
u8"/readability",
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::pair<std::u8string, std::string>> case_sensitive_files{
|
||||||
|
{u8"/TëStCãSeSçÉNâRïÖ", "testcasescenario"},
|
||||||
|
{u8"/lõREMÏpSüM/ÆSTHETÎCcøding", "aestheticcoding"},
|
||||||
|
{u8"/lõREMÏpSüM/smîLëyFÀÇë😊", "smileyface"},
|
||||||
|
{u8"/lõREMÏpSüM/NØRTHèast", "northeast"},
|
||||||
|
{u8"/lõREMÏpSüM/SPACEadventure", "spaceadventure"},
|
||||||
|
{u8"/lõREMÏpSüM/cõMPLEXïTy🚀", "complexity"},
|
||||||
|
{u8"/lõREMÏpSüM/thisisatest", "thisisatest"},
|
||||||
|
{u8"/lõREMÏpSüM/thisISaTEST", "thisisatest"},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::u8string> case_insensitive_dirs{
|
||||||
|
u8"/HELlÖwÖRLD", u8"/FÜÑKÿSTríÑg", u8"/uNÍcødéRøcks",
|
||||||
|
u8"/JÄLApeñOPePPÉR", u8"/SpiCysÜshiRoLL", u8"/CAféMØchAlatte",
|
||||||
|
u8"/čhàŧgƥŧ", u8"/lõremÏpsüM", u8"/šåmpŁëšTrInG",
|
||||||
|
u8"/pyTHonproGRamming", u8"/DãtÂScïêNcË", u8"/AiisFÛTÛRË",
|
||||||
|
u8"/reADabiLIty",
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::pair<std::u8string, std::string>> case_insensitive_files{
|
||||||
|
{u8"/TësTcãSeSçéNâRïÖ", "testcasescenario"},
|
||||||
|
{u8"/lõRemïpSüM/ÆstHETÎCcØDing", "aestheticcoding"},
|
||||||
|
{u8"/lõremïPSüM/smîlËYfàÇë😊", "smileyface"},
|
||||||
|
{u8"/lõREMÏPsÜM/NØRthÈAst", "northeast"},
|
||||||
|
{u8"/lõRemïPsüM/SPACEadvENTure", "spaceadventure"},
|
||||||
|
{u8"/LÕREMÏpSüM/CõMPlexïTy🚀", "complexity"},
|
||||||
|
{u8"/lõrEMÏpSüM/thiSISatest", "thisisatest"},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::u8string> non_matching_entries{
|
||||||
|
u8"/HELlÖwÖRLDx",
|
||||||
|
u8"/FÜÑKÿSTríÑj",
|
||||||
|
u8"/uNÍcødéRcks",
|
||||||
|
u8"/JÄLApeñOPePPÉ",
|
||||||
|
u8"/SpiCysÜshiRoLLx",
|
||||||
|
u8"/CAféMØchAltte",
|
||||||
|
u8"/čhàŧgƥŧx",
|
||||||
|
u8"/lõremÏpsü",
|
||||||
|
u8"/šåmpŁëšTrnG",
|
||||||
|
u8"/pyTHonproGRammin",
|
||||||
|
u8"/DãtÂScïêNcËx",
|
||||||
|
u8"/AiisFÛTÛTË",
|
||||||
|
u8"/reADabiLItx",
|
||||||
|
u8"/TësRcãSeSçéNâRïÖ",
|
||||||
|
u8"/lõRemïpüM/ÆstHETÎCcØDing",
|
||||||
|
u8"/lõremïPSüM/mîlËYfàÇë😊",
|
||||||
|
u8"/lõRMÏPsÜM/NØRthÈAst",
|
||||||
|
u8"/lõRemïPsüM/SPACEadvENTurex",
|
||||||
|
u8"/LÕREMÏpSüM/CõMPexïTy🚀",
|
||||||
|
u8"/lõrEMÏpSüM/thiSISatesy",
|
||||||
|
};
|
||||||
|
|
||||||
|
test::test_logger lgr;
|
||||||
|
auto fsimage = build_dwarfs(lgr, input, "null");
|
||||||
|
|
||||||
|
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
|
||||||
|
|
||||||
|
lgr.clear();
|
||||||
|
|
||||||
|
{
|
||||||
|
reader::filesystem_v2 fs(lgr, *input, mm,
|
||||||
|
{.metadata = {.case_insensitive_lookup = false}});
|
||||||
|
|
||||||
|
EXPECT_TRUE(lgr.empty());
|
||||||
|
|
||||||
|
for (auto const& dir : case_sensitive_dirs) {
|
||||||
|
auto name = u8string_to_string(dir);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& [file, content] : case_sensitive_files) {
|
||||||
|
auto name = u8string_to_string(file);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
EXPECT_EQ(content, fs.read_string(fs.open(dev->inode()))) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& dir : case_insensitive_dirs) {
|
||||||
|
auto name = u8string_to_string(dir);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_FALSE(dev) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& [file, content] : case_insensitive_files) {
|
||||||
|
auto name = u8string_to_string(file);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_FALSE(dev) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& ent : non_matching_entries) {
|
||||||
|
auto name = u8string_to_string(ent);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_FALSE(dev) << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lgr.clear();
|
||||||
|
|
||||||
|
{
|
||||||
|
reader::filesystem_v2 fs(lgr, *input, mm,
|
||||||
|
{.metadata = {.case_insensitive_lookup = true}});
|
||||||
|
|
||||||
|
EXPECT_THAT(
|
||||||
|
lgr.get_log(),
|
||||||
|
testing::Contains(testing::ResultOf(
|
||||||
|
[](const auto& entry) { return entry.output; },
|
||||||
|
testing::AllOf(testing::HasSubstr(u8string_to_string(
|
||||||
|
u8"case-insensitive collision in directory "
|
||||||
|
u8"\"lõREMÏpSüM\" (inode=")),
|
||||||
|
testing::HasSubstr("thisISaTEST, thisisatest")))));
|
||||||
|
|
||||||
|
for (auto const& dir : case_sensitive_dirs) {
|
||||||
|
auto name = u8string_to_string(dir);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& [file, content] : case_sensitive_files) {
|
||||||
|
auto name = u8string_to_string(file);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
EXPECT_EQ(content, fs.read_string(fs.open(dev->inode()))) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& dir : case_insensitive_dirs) {
|
||||||
|
auto name = u8string_to_string(dir);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& [file, content] : case_insensitive_files) {
|
||||||
|
auto name = u8string_to_string(file);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_TRUE(dev) << name;
|
||||||
|
EXPECT_EQ(content, fs.read_string(fs.open(dev->inode()))) << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& ent : non_matching_entries) {
|
||||||
|
auto name = u8string_to_string(ent);
|
||||||
|
auto dev = fs.find(name);
|
||||||
|
EXPECT_FALSE(dev) << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1053,6 +1053,7 @@ TEST_P(tools_test, end_to_end) {
|
|||||||
|
|
||||||
std::vector<std::string> all_options{
|
std::vector<std::string> all_options{
|
||||||
"-s",
|
"-s",
|
||||||
|
"-ocase_insensitive",
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
"-oenable_nlink",
|
"-oenable_nlink",
|
||||||
"-oreadonly",
|
"-oreadonly",
|
||||||
@ -1074,6 +1075,7 @@ TEST_P(tools_test, end_to_end) {
|
|||||||
|
|
||||||
for (unsigned bitmask = 0; bitmask < combinations; ++bitmask) {
|
for (unsigned bitmask = 0; bitmask < combinations; ++bitmask) {
|
||||||
std::vector<std::string> args;
|
std::vector<std::string> args;
|
||||||
|
bool case_insensitive{false};
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
bool enable_nlink{false};
|
bool enable_nlink{false};
|
||||||
bool readonly{false};
|
bool readonly{false};
|
||||||
@ -1083,6 +1085,9 @@ TEST_P(tools_test, end_to_end) {
|
|||||||
for (size_t i = 0; i < all_options.size(); ++i) {
|
for (size_t i = 0; i < all_options.size(); ++i) {
|
||||||
if ((1 << i) & bitmask) {
|
if ((1 << i) & bitmask) {
|
||||||
auto const& opt = all_options[i];
|
auto const& opt = all_options[i];
|
||||||
|
if (opt == "-ocase_insensitive") {
|
||||||
|
case_insensitive = true;
|
||||||
|
}
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
if (opt == "-oreadonly") {
|
if (opt == "-oreadonly") {
|
||||||
readonly = true;
|
readonly = true;
|
||||||
@ -1139,6 +1144,12 @@ TEST_P(tools_test, end_to_end) {
|
|||||||
EXPECT_EQ(st.st_gid, 3456) << runner.cmdline();
|
EXPECT_EQ(st.st_gid, 3456) << runner.cmdline();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
EXPECT_TRUE(fs::exists(mountpoint / "format.sh")) << runner.cmdline();
|
||||||
|
EXPECT_EQ(case_insensitive, fs::exists(mountpoint / "FORMAT.SH"))
|
||||||
|
<< runner.cmdline();
|
||||||
|
EXPECT_EQ(case_insensitive, fs::exists(mountpoint / "fOrMaT.Sh"))
|
||||||
|
<< runner.cmdline();
|
||||||
|
|
||||||
auto perfmon =
|
auto perfmon =
|
||||||
dwarfs::getxattr(mountpoint, "user.dwarfs.driver.perfmon");
|
dwarfs::getxattr(mountpoint, "user.dwarfs.driver.perfmon");
|
||||||
#if DWARFS_PERFMON_ENABLED
|
#if DWARFS_PERFMON_ENABLED
|
||||||
|
111
test/unicode_test.cpp
Normal file
111
test/unicode_test.cpp
Normal file
File diff suppressed because one or more lines are too long
@ -179,6 +179,7 @@ struct options {
|
|||||||
#endif
|
#endif
|
||||||
int enable_nlink{0};
|
int enable_nlink{0};
|
||||||
int readonly{0};
|
int readonly{0};
|
||||||
|
int case_insensitive{0};
|
||||||
int cache_image{0};
|
int cache_image{0};
|
||||||
int cache_files{0};
|
int cache_files{0};
|
||||||
size_t cachesize{0};
|
size_t cachesize{0};
|
||||||
@ -258,6 +259,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
|
|||||||
DWARFS_OPT("seq_detector=%s", seq_detector_thresh_str, 0),
|
DWARFS_OPT("seq_detector=%s", seq_detector_thresh_str, 0),
|
||||||
DWARFS_OPT("enable_nlink", enable_nlink, 1),
|
DWARFS_OPT("enable_nlink", enable_nlink, 1),
|
||||||
DWARFS_OPT("readonly", readonly, 1),
|
DWARFS_OPT("readonly", readonly, 1),
|
||||||
|
DWARFS_OPT("case_insensitive", case_insensitive, 1),
|
||||||
DWARFS_OPT("cache_image", cache_image, 1),
|
DWARFS_OPT("cache_image", cache_image, 1),
|
||||||
DWARFS_OPT("no_cache_image", cache_image, 0),
|
DWARFS_OPT("no_cache_image", cache_image, 0),
|
||||||
DWARFS_OPT("cache_files", cache_files, 1),
|
DWARFS_OPT("cache_files", cache_files, 1),
|
||||||
@ -1224,6 +1226,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) {
|
|||||||
<< " -o imagesize=NUM filesystem image size in bytes\n"
|
<< " -o imagesize=NUM filesystem image size in bytes\n"
|
||||||
<< " -o enable_nlink show correct hardlink numbers\n"
|
<< " -o enable_nlink show correct hardlink numbers\n"
|
||||||
<< " -o readonly show read-only file system\n"
|
<< " -o readonly show read-only file system\n"
|
||||||
|
<< " -o case_insensitive perform case-insensitive lookups\n"
|
||||||
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
|
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
|
||||||
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
|
<< " -o (no_)cache_files (don't) keep files in kernel cache\n"
|
||||||
<< " -o debuglevel=NAME " << logger::all_level_names() << "\n"
|
<< " -o debuglevel=NAME " << logger::all_level_names() << "\n"
|
||||||
@ -1464,6 +1467,7 @@ void load_filesystem(dwarfs_userdata& userdata) {
|
|||||||
fsopts.inode_reader.readahead = opts.readahead;
|
fsopts.inode_reader.readahead = opts.readahead;
|
||||||
fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
|
fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
|
||||||
fsopts.metadata.readonly = bool(opts.readonly);
|
fsopts.metadata.readonly = bool(opts.readonly);
|
||||||
|
fsopts.metadata.case_insensitive_lookup = bool(opts.case_insensitive);
|
||||||
fsopts.metadata.block_size = opts.blocksize;
|
fsopts.metadata.block_size = opts.blocksize;
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
fsopts.metadata.fs_uid = opts.fs_uid;
|
fsopts.metadata.fs_uid = opts.fs_uid;
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
"boost-multi-index",
|
"boost-multi-index",
|
||||||
"boost-process",
|
"boost-process",
|
||||||
"boost-program-options",
|
"boost-program-options",
|
||||||
|
"boost-sort",
|
||||||
"boost-thread",
|
"boost-thread",
|
||||||
"boost-uuid",
|
"boost-uuid",
|
||||||
"boost-variant",
|
"boost-variant",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user