mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-05 10:46:13 -04:00
feat: support extracting only files matching glob patterns (fixes gh #243)
This commit is contained in:
parent
853d8ca966
commit
2ff7602192
@ -44,6 +44,17 @@ to disk:
|
|||||||
case the default is to extract the files to the current directory, or
|
case the default is to extract the files to the current directory, or
|
||||||
to write the archive data to stdout.
|
to write the archive data to stdout.
|
||||||
|
|
||||||
|
- `--pattern=`*glob-pattern*:
|
||||||
|
If specified, only extract entries matching the pattern. Can be specified
|
||||||
|
multiple times, in which case all files matching one or more patterns will
|
||||||
|
be extracted. Can also be specified without `--pattern` for simplicity.
|
||||||
|
If prefixed with `i:`, the glob pattern match will be case-insensitive,
|
||||||
|
e.g. `i:**/*.txt` would match all `.txt`, `.TXT` and `.tXt` files. If you
|
||||||
|
want the *actual* pattern to start with `i:`, you have to prefix the
|
||||||
|
pattern with `:`, i.e. `:i:...`. The glob patterns support the wildcards
|
||||||
|
`*` and `?`, character classes (`[avt]`), ranges (`[a-h]`), complementation
|
||||||
|
(`[!a-h]`) and globstar (`**`).
|
||||||
|
|
||||||
- `-O`, `--image-offset=`*value*|`auto`:
|
- `-O`, `--image-offset=`*value*|`auto`:
|
||||||
Specify the byte offset at which the filesystem is located in the image.
|
Specify the byte offset at which the filesystem is located in the image.
|
||||||
Use `auto` to detect the offset automatically. This is also the default.
|
Use `auto` to detect the offset automatically. This is also the default.
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
|
class glob_matcher;
|
||||||
class library_dependencies;
|
class library_dependencies;
|
||||||
class logger;
|
class logger;
|
||||||
class os_access;
|
class os_access;
|
||||||
@ -72,7 +73,13 @@ class filesystem_extractor {
|
|||||||
bool extract(reader::filesystem_v2 const& fs,
|
bool extract(reader::filesystem_v2 const& fs,
|
||||||
filesystem_extractor_options const& opts =
|
filesystem_extractor_options const& opts =
|
||||||
filesystem_extractor_options()) {
|
filesystem_extractor_options()) {
|
||||||
return impl_->extract(fs, opts);
|
return impl_->extract(fs, nullptr, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool extract(reader::filesystem_v2 const& fs, glob_matcher const* matcher,
|
||||||
|
filesystem_extractor_options const& opts =
|
||||||
|
filesystem_extractor_options()) {
|
||||||
|
return impl_->extract(fs, matcher, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
@ -84,8 +91,9 @@ class filesystem_extractor {
|
|||||||
virtual void open_stream(std::ostream& os, std::string const& format) = 0;
|
virtual void open_stream(std::ostream& os, std::string const& format) = 0;
|
||||||
virtual void open_disk(std::filesystem::path const& output) = 0;
|
virtual void open_disk(std::filesystem::path const& output) = 0;
|
||||||
virtual void close() = 0;
|
virtual void close() = 0;
|
||||||
virtual bool extract(reader::filesystem_v2 const& fs,
|
virtual bool
|
||||||
filesystem_extractor_options const& opts) = 0;
|
extract(reader::filesystem_v2 const& fs, glob_matcher const* matcher,
|
||||||
|
filesystem_extractor_options const& opts) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
// This is required to avoid Windows.h being pulled in by libarchive
|
// This is required to avoid Windows.h being pulled in by libarchive
|
||||||
// and polluting our environment with all sorts of shit.
|
// and polluting our environment with all sorts of shit.
|
||||||
@ -41,6 +42,7 @@
|
|||||||
|
|
||||||
#include <dwarfs/file_stat.h>
|
#include <dwarfs/file_stat.h>
|
||||||
#include <dwarfs/fstypes.h>
|
#include <dwarfs/fstypes.h>
|
||||||
|
#include <dwarfs/glob_matcher.h>
|
||||||
#include <dwarfs/library_dependencies.h>
|
#include <dwarfs/library_dependencies.h>
|
||||||
#include <dwarfs/logger.h>
|
#include <dwarfs/logger.h>
|
||||||
#include <dwarfs/os_access.h>
|
#include <dwarfs/os_access.h>
|
||||||
@ -189,7 +191,7 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool extract(reader::filesystem_v2 const& fs,
|
bool extract(reader::filesystem_v2 const& fs, glob_matcher const* matcher,
|
||||||
filesystem_extractor_options const& opts) override;
|
filesystem_extractor_options const& opts) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -249,7 +251,8 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
|||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
bool filesystem_extractor_<LoggerPolicy>::extract(
|
bool filesystem_extractor_<LoggerPolicy>::extract(
|
||||||
reader::filesystem_v2 const& fs, filesystem_extractor_options const& opts) {
|
reader::filesystem_v2 const& fs, glob_matcher const* matcher,
|
||||||
|
filesystem_extractor_options const& opts) {
|
||||||
DWARFS_CHECK(a_, "filesystem not opened");
|
DWARFS_CHECK(a_, "filesystem not opened");
|
||||||
|
|
||||||
auto lr = ::archive_entry_linkresolver_new();
|
auto lr = ::archive_entry_linkresolver_new();
|
||||||
@ -351,6 +354,23 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::unordered_set<std::filesystem::path> matched_dirs;
|
||||||
|
|
||||||
|
if (matcher) {
|
||||||
|
fs.walk([&](auto entry) {
|
||||||
|
if (!entry.inode().is_directory()) {
|
||||||
|
if (matcher->match(entry.unix_path())) {
|
||||||
|
while (auto parent = entry.parent()) {
|
||||||
|
if (!matched_dirs.insert(parent->fs_path()).second) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
entry = *parent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fs.walk_data_order([&](auto entry) {
|
fs.walk_data_order([&](auto entry) {
|
||||||
// TODO: we can surely early abort walk() somehow
|
// TODO: we can surely early abort walk() somehow
|
||||||
if (entry.is_root() || hard_error) {
|
if (entry.is_root() || hard_error) {
|
||||||
@ -359,6 +379,23 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
|
|||||||
|
|
||||||
auto inode = entry.inode();
|
auto inode = entry.inode();
|
||||||
|
|
||||||
|
if (matcher) {
|
||||||
|
LOG_TRACE << "checking " << entry.unix_path();
|
||||||
|
if (inode.is_directory()) {
|
||||||
|
if (!matched_dirs.contains(entry.fs_path())) {
|
||||||
|
LOG_TRACE << "skipping directory " << entry.fs_path();
|
||||||
|
// no need to extract this directory
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!matcher->match(entry.unix_path())) {
|
||||||
|
LOG_TRACE << "skipping " << entry.fs_path();
|
||||||
|
// no match, skip this entry
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto ae = ::archive_entry_new();
|
auto ae = ::archive_entry_new();
|
||||||
auto stbuf = fs.getattr(inode);
|
auto stbuf = fs.getattr(inode);
|
||||||
|
|
||||||
|
@ -2000,6 +2000,37 @@ TEST(dwarfsextract_test, mtree) {
|
|||||||
EXPECT_THAT(out, ::testing::HasSubstr("type=file"));
|
EXPECT_THAT(out, ::testing::HasSubstr("type=file"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(dwarfsextract_test, patterns) {
|
||||||
|
auto mkdt = mkdwarfs_tester::create_empty();
|
||||||
|
mkdt.add_test_file_tree();
|
||||||
|
ASSERT_EQ(0, mkdt.run({"-i", "/", "-o", "-", "--with-devices"}) != 0)
|
||||||
|
<< mkdt.err();
|
||||||
|
auto t = dwarfsextract_tester::create_with_image(mkdt.out());
|
||||||
|
ASSERT_EQ(0, t.run({"-i", "image.dwarfs", "-f", "mtree", "**/*.enc",
|
||||||
|
"{dev,etc,lib,var}/[m-ot-z]*"}))
|
||||||
|
<< t.err();
|
||||||
|
auto out = t.out();
|
||||||
|
EXPECT_TRUE(out.starts_with("#mtree")) << out;
|
||||||
|
std::vector<std::string> const expected{
|
||||||
|
"./dev",
|
||||||
|
"./dev/tty37",
|
||||||
|
"./etc",
|
||||||
|
"./etc/netconfig",
|
||||||
|
"./usr",
|
||||||
|
"./usr/lib64",
|
||||||
|
"./usr/lib64/tcl8.6",
|
||||||
|
"./usr/lib64/tcl8.6/encoding",
|
||||||
|
"./usr/lib64/tcl8.6/encoding/cp950.enc",
|
||||||
|
"./usr/lib64/tcl8.6/encoding/iso8859-8.enc",
|
||||||
|
};
|
||||||
|
auto mtree = test::parse_mtree(out);
|
||||||
|
std::vector<std::string> actual;
|
||||||
|
for (auto const& entry : mtree) {
|
||||||
|
actual.push_back(entry.first);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(dwarfsextract_test, stdout_progress_error) {
|
TEST(dwarfsextract_test, stdout_progress_error) {
|
||||||
auto t = dwarfsextract_tester::create_with_image();
|
auto t = dwarfsextract_tester::create_with_image();
|
||||||
EXPECT_NE(0,
|
EXPECT_NE(0,
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
#include <dwarfs/config.h>
|
#include <dwarfs/config.h>
|
||||||
|
#include <dwarfs/glob_matcher.h>
|
||||||
#include <dwarfs/library_dependencies.h>
|
#include <dwarfs/library_dependencies.h>
|
||||||
#include <dwarfs/logger.h>
|
#include <dwarfs/logger.h>
|
||||||
#include <dwarfs/mmap.h>
|
#include <dwarfs/mmap.h>
|
||||||
@ -77,6 +78,9 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
("output,o",
|
("output,o",
|
||||||
po_sys_value<sys_string>(&output),
|
po_sys_value<sys_string>(&output),
|
||||||
"output file or directory")
|
"output file or directory")
|
||||||
|
("pattern",
|
||||||
|
po::value<std::vector<std::string>>(),
|
||||||
|
"only extract files matching these patterns")
|
||||||
("image-offset,O",
|
("image-offset,O",
|
||||||
po::value<std::string>(&image_offset)->default_value("auto"),
|
po::value<std::string>(&image_offset)->default_value("auto"),
|
||||||
"filesystem image offset in bytes")
|
"filesystem image offset in bytes")
|
||||||
@ -111,10 +115,17 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
|
|
||||||
tool::add_common_options(opts, logopts);
|
tool::add_common_options(opts, logopts);
|
||||||
|
|
||||||
|
po::positional_options_description pos;
|
||||||
|
pos.add("pattern", -1);
|
||||||
|
|
||||||
po::variables_map vm;
|
po::variables_map vm;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
po::store(po::parse_command_line(argc, argv, opts), vm);
|
po::store(po::basic_command_line_parser<sys_char>(argc, argv)
|
||||||
|
.options(opts)
|
||||||
|
.positional(pos)
|
||||||
|
.run(),
|
||||||
|
vm);
|
||||||
po::notify(vm);
|
po::notify(vm);
|
||||||
} catch (po::error const& e) {
|
} catch (po::error const& e) {
|
||||||
iol.err << "error: " << e.what() << "\n";
|
iol.err << "error: " << e.what() << "\n";
|
||||||
@ -141,6 +152,13 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<glob_matcher> matcher;
|
||||||
|
|
||||||
|
if (vm.count("pattern")) {
|
||||||
|
matcher = std::make_unique<glob_matcher>(
|
||||||
|
vm["pattern"].as<std::vector<std::string>>());
|
||||||
|
}
|
||||||
|
|
||||||
int rv = 0;
|
int rv = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -214,7 +232,7 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
rv = fsx.extract(fs, fsx_opts) ? 0 : 2;
|
rv = fsx.extract(fs, matcher.get(), fsx_opts) ? 0 : 2;
|
||||||
|
|
||||||
fsx.close();
|
fsx.close();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user