Allow specifying a filesystem image offset (fixes github #38)

Works for dwarfs, dwarfsck and dwarfsextract
This commit is contained in:
Marcus Holland-Moritz 2021-03-28 00:27:06 +01:00
parent b4a8e0134b
commit aa92e69d10
6 changed files with 145 additions and 31 deletions

View File

@ -65,7 +65,7 @@ class filesystem_v2 {
static int identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os, static int identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
int detail_level = 0, size_t num_readers = 1, int detail_level = 0, size_t num_readers = 1,
bool check_integrity = false); bool check_integrity = false, off_t image_offset = 0);
void dump(std::ostream& os, int detail_level) const { void dump(std::ostream& os, int detail_level) const {
impl_->dump(os, detail_level); impl_->dump(os, detail_level);

View File

@ -25,6 +25,8 @@
#include <iosfwd> #include <iosfwd>
#include <optional> #include <optional>
#include <sys/types.h>
namespace dwarfs { namespace dwarfs {
enum class mlock_mode { NONE, TRY, MUST }; enum class mlock_mode { NONE, TRY, MUST };
@ -42,7 +44,10 @@ struct metadata_options {
}; };
struct filesystem_options { struct filesystem_options {
static constexpr off_t IMAGE_OFFSET_AUTO{-1};
mlock_mode lock_mode{mlock_mode::NONE}; mlock_mode lock_mode{mlock_mode::NONE};
off_t image_offset{0};
block_cache_options block_cache; block_cache_options block_cache;
metadata_options metadata; metadata_options metadata;
}; };

View File

@ -59,6 +59,7 @@ struct options {
const char* workers_str; // TODO: const?? -> use string? const char* workers_str; // TODO: const?? -> use string?
const char* mlock_str; // TODO: const?? -> use string? const char* mlock_str; // TODO: const?? -> use string?
const char* decompress_ratio_str; // TODO: const?? -> use string? const char* decompress_ratio_str; // TODO: const?? -> use string?
const char* image_offset_str; // TODO: const?? -> use string?
int enable_nlink; int enable_nlink;
int readonly; int readonly;
int cache_image; int cache_image;
@ -82,6 +83,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
DWARFS_OPT("workers=%s", workers_str, 0), DWARFS_OPT("workers=%s", workers_str, 0),
DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0),
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
DWARFS_OPT("offset=%s", image_offset_str, 0),
DWARFS_OPT("enable_nlink", enable_nlink, 1), DWARFS_OPT("enable_nlink", enable_nlink, 1),
DWARFS_OPT("readonly", readonly, 1), DWARFS_OPT("readonly", readonly, 1),
DWARFS_OPT("cache_image", cache_image, 1), DWARFS_OPT("cache_image", cache_image, 1),
@ -112,6 +114,19 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
fsopts.block_cache.mm_release = !s_opts.cache_image; fsopts.block_cache.mm_release = !s_opts.cache_image;
fsopts.metadata.enable_nlink = bool(s_opts.enable_nlink); fsopts.metadata.enable_nlink = bool(s_opts.enable_nlink);
fsopts.metadata.readonly = bool(s_opts.readonly); fsopts.metadata.readonly = bool(s_opts.readonly);
if (s_opts.image_offset_str) {
std::string image_offset{s_opts.image_offset_str};
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
}
s_fs = std::make_shared<filesystem_v2>( s_fs = std::make_shared<filesystem_v2>(
s_lgr, std::make_shared<mmap>(s_opts.fsimage), fsopts, FUSE_ROOT_ID); s_lgr, std::make_shared<mmap>(s_opts.fsimage), fsopts, FUSE_ROOT_ID);
@ -426,20 +441,22 @@ void op_statfs(fuse_req_t req, fuse_ino_t /*ino*/) {
} }
void usage(const char* progname) { void usage(const char* progname) {
std::cerr << "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION std::cerr
<< ")\n\n" << "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION
<< "usage: " << progname << " image mountpoint [options]\n\n" << ")\n\n"
<< "DWARFS options:\n" << "usage: " << progname << " image mountpoint [options]\n\n"
<< " -o cachesize=SIZE set size of block cache (512M)\n" << "DWARFS options:\n"
<< " -o workers=NUM number of worker threads (2)\n" << " -o cachesize=SIZE set size of block cache (512M)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n" << " -o workers=NUM number of worker threads (2)\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n" << " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o enable_nlink show correct hardlink numbers\n" << " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o readonly show read-only file system\n" << " -o offset=NUM|auto filesystem image offset in bytes (0)\n"
<< " -o (no_)cache_image (don't) keep image in kernel cache\n" << " -o enable_nlink show correct hardlink numbers\n"
<< " -o (no_)cache_files (don't) keep files in kernel cache\n" << " -o readonly show read-only file system\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n" << " -o (no_)cache_image (don't) keep image in kernel cache\n"
<< std::endl; << " -o (no_)cache_files (don't) keep files in kernel cache\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n"
<< std::endl;
#if FUSE_USE_VERSION >= 30 #if FUSE_USE_VERSION >= 30
fuse_cmdline_help(); fuse_cmdline_help();

View File

@ -55,13 +55,71 @@ namespace {
class filesystem_parser { class filesystem_parser {
public: public:
explicit filesystem_parser(std::shared_ptr<mmif> mm) static off_t find_image_offset(mmif& mm, off_t image_offset) {
: mm_(mm) { if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) {
if (mm_->size() < sizeof(file_header)) { return image_offset;
}
static constexpr std::array<char, 7> magic{
{'D', 'W', 'A', 'R', 'F', 'S', MAJOR_VERSION}};
off_t start = 0;
for (;;) {
auto ps = mm.as<void>(start);
auto pc = ::memmem(ps, mm.size(), magic.data(), magic.size());
if (!pc) {
break;
}
off_t pos =
static_cast<uint8_t const*>(pc) - static_cast<uint8_t const*>(ps);
if (pos + sizeof(file_header) >= mm.size()) {
break;
}
auto fh = mm.as<file_header>(pos);
if (fh->minor < 2) {
// best we can do for older file systems
return pos;
}
// do a little more validation before we return
if (pos + sizeof(section_header_v2) >= mm.size()) {
break;
}
auto sh = mm.as<section_header_v2>(pos);
if (sh->number == 0) {
if (pos + 2 * sizeof(section_header_v2) + sh->length >= mm.size()) {
break;
}
ps = mm.as<void>(pos + sizeof(section_header_v2) + sh->length);
if (::memcmp(ps, magic.data(), magic.size()) == 0 and
reinterpret_cast<section_header_v2 const*>(ps)->number == 1) {
return pos;
}
}
start = pos + magic.size();
}
DWARFS_THROW(runtime_error, "no filesystem found");
}
explicit filesystem_parser(std::shared_ptr<mmif> mm, off_t image_offset = 0)
: mm_{mm}
, image_offset_{find_image_offset(*mm_, image_offset)} {
if (mm_->size() < image_offset_ + sizeof(file_header)) {
DWARFS_THROW(runtime_error, "file too small"); DWARFS_THROW(runtime_error, "file too small");
} }
auto fh = mm_->as<file_header>(); auto fh = mm_->as<file_header>(image_offset_);
if (::memcmp(&fh->magic[0], "DWARFS", 6) != 0) { if (::memcmp(&fh->magic[0], "DWARFS", 6) != 0) {
DWARFS_THROW(runtime_error, "magic not found"); DWARFS_THROW(runtime_error, "magic not found");
@ -83,7 +141,7 @@ class filesystem_parser {
} }
std::optional<fs_section> next_section() { std::optional<fs_section> next_section() {
if (offset_ < mm_->size()) { if (offset_ < static_cast<off_t>(mm_->size())) {
auto section = fs_section(*mm_, offset_, version_); auto section = fs_section(*mm_, offset_, version_);
offset_ = section.end(); offset_ = section.end();
return section; return section;
@ -92,15 +150,20 @@ class filesystem_parser {
return std::nullopt; return std::nullopt;
} }
void rewind() { offset_ = version_ == 1 ? sizeof(file_header) : 0; } void rewind() {
offset_ = image_offset_ + (version_ == 1 ? sizeof(file_header) : 0);
}
std::string version() const { std::string version() const {
return fmt::format("{0}.{1} [{2}]", major_, minor_, version_); return fmt::format("{0}.{1} [{2}]", major_, minor_, version_);
} }
off_t image_offset() const { return image_offset_; }
private: private:
std::shared_ptr<mmif> mm_; std::shared_ptr<mmif> mm_;
size_t offset_{0}; off_t const image_offset_;
off_t offset_{0};
int version_{0}; int version_{0};
uint8_t major_{0}; uint8_t major_{0};
uint8_t minor_{0}; uint8_t minor_{0};
@ -218,7 +281,7 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
int inode_offset) int inode_offset)
: LOG_PROXY_INIT(lgr) : LOG_PROXY_INIT(lgr)
, mm_(std::move(mm)) { , mm_(std::move(mm)) {
filesystem_parser parser(mm_); filesystem_parser parser(mm_, options.image_offset);
block_cache cache(lgr, mm_, options.block_cache); block_cache cache(lgr, mm_, options.block_cache);
section_map sections; section_map sections;
@ -475,13 +538,18 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm, int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::ostream& os, int detail_level, std::ostream& os, int detail_level,
size_t num_readers, bool check_integrity) { size_t num_readers, bool check_integrity,
off_t image_offset) {
// TODO: // TODO:
LOG_PROXY(debug_logger_policy, lgr); LOG_PROXY(debug_logger_policy, lgr);
filesystem_parser parser(mm); filesystem_parser parser(mm, image_offset);
if (detail_level > 0) { if (detail_level > 0) {
os << "FILESYSTEM version " << parser.version() << std::endl; os << "DwarFS version " << parser.version();
if (auto off = parser.image_offset(); off > 0) {
os << " at offset " << off;
}
os << std::endl;
} }
worker_group wg("reader", num_readers); worker_group wg("reader", num_readers);
@ -540,6 +608,7 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr<mmif> mm,
if (detail_level > 0) { if (detail_level > 0) {
fsopts.metadata.enable_nlink = true; fsopts.metadata.enable_nlink = true;
} }
fsopts.image_offset = image_offset;
filesystem_v2(lgr, mm, fsopts).dump(os, detail_level); filesystem_v2(lgr, mm, fsopts).dump(os, detail_level);
} }

View File

@ -43,7 +43,7 @@ namespace po = boost::program_options;
int dwarfsck(int argc, char** argv) { int dwarfsck(int argc, char** argv) {
const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u); const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u);
std::string log_level, input, export_metadata; std::string log_level, input, export_metadata, image_offset;
size_t num_workers; size_t num_workers;
int detail; int detail;
bool json = false; bool json = false;
@ -58,6 +58,9 @@ int dwarfsck(int argc, char** argv) {
("detail,d", ("detail,d",
po::value<int>(&detail)->default_value(2), po::value<int>(&detail)->default_value(2),
"detail level") "detail level")
("image-offset,O",
po::value<std::string>(&image_offset)->default_value("auto"),
"filesystem image offset in bytes")
("num-workers,n", ("num-workers,n",
po::value<size_t>(&num_workers)->default_value(num_cpu), po::value<size_t>(&num_workers)->default_value(num_cpu),
"number of reader worker threads") "number of reader worker threads")
@ -101,22 +104,31 @@ int dwarfsck(int argc, char** argv) {
LOG_PROXY(debug_logger_policy, lgr); LOG_PROXY(debug_logger_policy, lgr);
try { try {
filesystem_options fsopts;
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
auto mm = std::make_shared<mmap>(input); auto mm = std::make_shared<mmap>(input);
if (!export_metadata.empty()) { if (!export_metadata.empty()) {
auto of = folly::File(export_metadata, O_RDWR | O_CREAT | O_TRUNC); auto of = folly::File(export_metadata, O_RDWR | O_CREAT | O_TRUNC);
filesystem_v2 fs(lgr, mm); filesystem_v2 fs(lgr, mm, fsopts);
auto json = fs.serialize_metadata_as_json(true); auto json = fs.serialize_metadata_as_json(true);
if (folly::writeFull(of.fd(), json.data(), json.size()) < 0) { if (folly::writeFull(of.fd(), json.data(), json.size()) < 0) {
LOG_ERROR << "failed to export metadata"; LOG_ERROR << "failed to export metadata";
} }
of.close(); of.close();
} else if (json) { } else if (json) {
filesystem_v2 fs(lgr, mm); filesystem_v2 fs(lgr, mm, fsopts);
std::cout << folly::toPrettyJson(fs.metadata_as_dynamic()) << std::endl; std::cout << folly::toPrettyJson(fs.metadata_as_dynamic()) << std::endl;
} else { } else {
filesystem_v2::identify(lgr, mm, std::cout, detail, num_workers, filesystem_v2::identify(lgr, mm, std::cout, detail, num_workers,
check_integrity); check_integrity, fsopts.image_offset);
} }
} catch (system_error const& e) { } catch (system_error const& e) {
LOG_ERROR << folly::exceptionStr(e); LOG_ERROR << folly::exceptionStr(e);

View File

@ -42,7 +42,8 @@ using namespace dwarfs;
namespace { namespace {
int dwarfsextract(int argc, char** argv) { int dwarfsextract(int argc, char** argv) {
std::string filesystem, output, format, cache_size_str, log_level; std::string filesystem, output, format, cache_size_str, log_level,
image_offset;
size_t num_workers; size_t num_workers;
// clang-format off // clang-format off
@ -54,6 +55,9 @@ int dwarfsextract(int argc, char** argv) {
("output,o", ("output,o",
po::value<std::string>(&output), po::value<std::string>(&output),
"output file or directory") "output file or directory")
("image-offset,O",
po::value<std::string>(&image_offset)->default_value("auto"),
"filesystem image offset in bytes")
("format,f", ("format,f",
po::value<std::string>(&format), po::value<std::string>(&format),
"output format") "output format")
@ -89,6 +93,13 @@ int dwarfsextract(int argc, char** argv) {
try { try {
stream_logger lgr(std::cerr, logger::parse_level(log_level)); stream_logger lgr(std::cerr, logger::parse_level(log_level));
filesystem_options fsopts; filesystem_options fsopts;
try {
fsopts.image_offset = image_offset == "auto"
? filesystem_options::IMAGE_OFFSET_AUTO
: folly::to<off_t>(image_offset);
} catch (...) {
DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset);
}
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str); fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
fsopts.block_cache.num_workers = num_workers; fsopts.block_cache.num_workers = num_workers;