diff --git a/include/dwarfs/filesystem_v2.h b/include/dwarfs/filesystem_v2.h index ab0d14c1..771a8539 100644 --- a/include/dwarfs/filesystem_v2.h +++ b/include/dwarfs/filesystem_v2.h @@ -65,7 +65,7 @@ class filesystem_v2 { static int identify(logger& lgr, std::shared_ptr mm, std::ostream& os, int detail_level = 0, size_t num_readers = 1, - bool check_integrity = false); + bool check_integrity = false, off_t image_offset = 0); void dump(std::ostream& os, int detail_level) const { impl_->dump(os, detail_level); diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 0e9d158c..519ea7df 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -25,6 +25,8 @@ #include #include +#include + namespace dwarfs { enum class mlock_mode { NONE, TRY, MUST }; @@ -42,7 +44,10 @@ struct metadata_options { }; struct filesystem_options { + static constexpr off_t IMAGE_OFFSET_AUTO{-1}; + mlock_mode lock_mode{mlock_mode::NONE}; + off_t image_offset{0}; block_cache_options block_cache; metadata_options metadata; }; diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index 808a87e3..a6314647 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -59,6 +59,7 @@ struct options { const char* workers_str; // TODO: const?? -> use string? const char* mlock_str; // TODO: const?? -> use string? const char* decompress_ratio_str; // TODO: const?? -> use string? + const char* image_offset_str; // TODO: const?? -> use string? int enable_nlink; int readonly; int cache_image; @@ -82,6 +83,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = { DWARFS_OPT("workers=%s", workers_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), + DWARFS_OPT("offset=%s", image_offset_str, 0), DWARFS_OPT("enable_nlink", enable_nlink, 1), DWARFS_OPT("readonly", readonly, 1), DWARFS_OPT("cache_image", cache_image, 1), @@ -112,6 +114,19 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) { fsopts.block_cache.mm_release = !s_opts.cache_image; fsopts.metadata.enable_nlink = bool(s_opts.enable_nlink); fsopts.metadata.readonly = bool(s_opts.readonly); + + if (s_opts.image_offset_str) { + std::string image_offset{s_opts.image_offset_str}; + + try { + fsopts.image_offset = image_offset == "auto" + ? filesystem_options::IMAGE_OFFSET_AUTO + : folly::to(image_offset); + } catch (...) { + DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset); + } + } + s_fs = std::make_shared( s_lgr, std::make_shared(s_opts.fsimage), fsopts, FUSE_ROOT_ID); @@ -426,20 +441,22 @@ void op_statfs(fuse_req_t req, fuse_ino_t /*ino*/) { } void usage(const char* progname) { - std::cerr << "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION - << ")\n\n" - << "usage: " << progname << " image mountpoint [options]\n\n" - << "DWARFS options:\n" - << " -o cachesize=SIZE set size of block cache (512M)\n" - << " -o workers=NUM number of worker threads (2)\n" - << " -o mlock=NAME mlock mode: (none), try, must\n" - << " -o decratio=NUM ratio for full decompression (0.8)\n" - << " -o enable_nlink show correct hardlink numbers\n" - << " -o readonly show read-only file system\n" - << " -o (no_)cache_image (don't) keep image in kernel cache\n" - << " -o (no_)cache_files (don't) keep files in kernel cache\n" - << " -o debuglevel=NAME error, warn, (info), debug, trace\n" - << std::endl; + std::cerr + << "dwarfs (" << PRJ_GIT_ID << ", fuse version " << FUSE_USE_VERSION + << ")\n\n" + << "usage: " << progname << " image mountpoint [options]\n\n" + << "DWARFS options:\n" + << " -o cachesize=SIZE set size of block cache (512M)\n" + << " -o workers=NUM number of worker threads (2)\n" + << " -o mlock=NAME mlock mode: (none), try, must\n" + << " -o decratio=NUM ratio for full decompression (0.8)\n" + << " -o offset=NUM|auto filesystem image offset in bytes (0)\n" + << " -o enable_nlink show correct hardlink numbers\n" + << " -o readonly show read-only file system\n" + << " -o (no_)cache_image (don't) keep image in kernel cache\n" + << " -o (no_)cache_files (don't) keep files in kernel cache\n" + << " -o debuglevel=NAME error, warn, (info), debug, trace\n" + << std::endl; #if FUSE_USE_VERSION >= 30 fuse_cmdline_help(); diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index b2b0fea8..6fb07908 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -55,13 +55,71 @@ namespace { class filesystem_parser { public: - explicit filesystem_parser(std::shared_ptr mm) - : mm_(mm) { - if (mm_->size() < sizeof(file_header)) { + static off_t find_image_offset(mmif& mm, off_t image_offset) { + if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) { + return image_offset; + } + + static constexpr std::array magic{ + {'D', 'W', 'A', 'R', 'F', 'S', MAJOR_VERSION}}; + + off_t start = 0; + for (;;) { + auto ps = mm.as(start); + auto pc = ::memmem(ps, mm.size(), magic.data(), magic.size()); + + if (!pc) { + break; + } + + off_t pos = + static_cast(pc) - static_cast(ps); + + if (pos + sizeof(file_header) >= mm.size()) { + break; + } + + auto fh = mm.as(pos); + + if (fh->minor < 2) { + // best we can do for older file systems + return pos; + } + + // do a little more validation before we return + if (pos + sizeof(section_header_v2) >= mm.size()) { + break; + } + + auto sh = mm.as(pos); + + if (sh->number == 0) { + if (pos + 2 * sizeof(section_header_v2) + sh->length >= mm.size()) { + break; + } + + ps = mm.as(pos + sizeof(section_header_v2) + sh->length); + + if (::memcmp(ps, magic.data(), magic.size()) == 0 and + reinterpret_cast(ps)->number == 1) { + return pos; + } + } + + start = pos + magic.size(); + } + + DWARFS_THROW(runtime_error, "no filesystem found"); + } + + explicit filesystem_parser(std::shared_ptr mm, off_t image_offset = 0) + : mm_{mm} + , image_offset_{find_image_offset(*mm_, image_offset)} { + if (mm_->size() < image_offset_ + sizeof(file_header)) { DWARFS_THROW(runtime_error, "file too small"); } - auto fh = mm_->as(); + auto fh = mm_->as(image_offset_); if (::memcmp(&fh->magic[0], "DWARFS", 6) != 0) { DWARFS_THROW(runtime_error, "magic not found"); @@ -83,7 +141,7 @@ class filesystem_parser { } std::optional next_section() { - if (offset_ < mm_->size()) { + if (offset_ < static_cast(mm_->size())) { auto section = fs_section(*mm_, offset_, version_); offset_ = section.end(); return section; @@ -92,15 +150,20 @@ class filesystem_parser { return std::nullopt; } - void rewind() { offset_ = version_ == 1 ? sizeof(file_header) : 0; } + void rewind() { + offset_ = image_offset_ + (version_ == 1 ? sizeof(file_header) : 0); + } std::string version() const { return fmt::format("{0}.{1} [{2}]", major_, minor_, version_); } + off_t image_offset() const { return image_offset_; } + private: std::shared_ptr mm_; - size_t offset_{0}; + off_t const image_offset_; + off_t offset_{0}; int version_{0}; uint8_t major_{0}; uint8_t minor_{0}; @@ -218,7 +281,7 @@ filesystem_::filesystem_(logger& lgr, std::shared_ptr mm, int inode_offset) : LOG_PROXY_INIT(lgr) , mm_(std::move(mm)) { - filesystem_parser parser(mm_); + filesystem_parser parser(mm_, options.image_offset); block_cache cache(lgr, mm_, options.block_cache); section_map sections; @@ -475,13 +538,18 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog, int filesystem_v2::identify(logger& lgr, std::shared_ptr mm, std::ostream& os, int detail_level, - size_t num_readers, bool check_integrity) { + size_t num_readers, bool check_integrity, + off_t image_offset) { // TODO: LOG_PROXY(debug_logger_policy, lgr); - filesystem_parser parser(mm); + filesystem_parser parser(mm, image_offset); if (detail_level > 0) { - os << "FILESYSTEM version " << parser.version() << std::endl; + os << "DwarFS version " << parser.version(); + if (auto off = parser.image_offset(); off > 0) { + os << " at offset " << off; + } + os << std::endl; } worker_group wg("reader", num_readers); @@ -540,6 +608,7 @@ int filesystem_v2::identify(logger& lgr, std::shared_ptr mm, if (detail_level > 0) { fsopts.metadata.enable_nlink = true; } + fsopts.image_offset = image_offset; filesystem_v2(lgr, mm, fsopts).dump(os, detail_level); } diff --git a/src/dwarfsck.cpp b/src/dwarfsck.cpp index 08623335..0b4e5901 100644 --- a/src/dwarfsck.cpp +++ b/src/dwarfsck.cpp @@ -43,7 +43,7 @@ namespace po = boost::program_options; int dwarfsck(int argc, char** argv) { const size_t num_cpu = std::max(std::thread::hardware_concurrency(), 1u); - std::string log_level, input, export_metadata; + std::string log_level, input, export_metadata, image_offset; size_t num_workers; int detail; bool json = false; @@ -58,6 +58,9 @@ int dwarfsck(int argc, char** argv) { ("detail,d", po::value(&detail)->default_value(2), "detail level") + ("image-offset,O", + po::value(&image_offset)->default_value("auto"), + "filesystem image offset in bytes") ("num-workers,n", po::value(&num_workers)->default_value(num_cpu), "number of reader worker threads") @@ -101,22 +104,31 @@ int dwarfsck(int argc, char** argv) { LOG_PROXY(debug_logger_policy, lgr); try { + filesystem_options fsopts; + try { + fsopts.image_offset = image_offset == "auto" + ? filesystem_options::IMAGE_OFFSET_AUTO + : folly::to(image_offset); + } catch (...) { + DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset); + } + auto mm = std::make_shared(input); if (!export_metadata.empty()) { auto of = folly::File(export_metadata, O_RDWR | O_CREAT | O_TRUNC); - filesystem_v2 fs(lgr, mm); + filesystem_v2 fs(lgr, mm, fsopts); auto json = fs.serialize_metadata_as_json(true); if (folly::writeFull(of.fd(), json.data(), json.size()) < 0) { LOG_ERROR << "failed to export metadata"; } of.close(); } else if (json) { - filesystem_v2 fs(lgr, mm); + filesystem_v2 fs(lgr, mm, fsopts); std::cout << folly::toPrettyJson(fs.metadata_as_dynamic()) << std::endl; } else { filesystem_v2::identify(lgr, mm, std::cout, detail, num_workers, - check_integrity); + check_integrity, fsopts.image_offset); } } catch (system_error const& e) { LOG_ERROR << folly::exceptionStr(e); diff --git a/src/dwarfsextract.cpp b/src/dwarfsextract.cpp index c7c3bdb2..8b0af5ec 100644 --- a/src/dwarfsextract.cpp +++ b/src/dwarfsextract.cpp @@ -42,7 +42,8 @@ using namespace dwarfs; namespace { int dwarfsextract(int argc, char** argv) { - std::string filesystem, output, format, cache_size_str, log_level; + std::string filesystem, output, format, cache_size_str, log_level, + image_offset; size_t num_workers; // clang-format off @@ -54,6 +55,9 @@ int dwarfsextract(int argc, char** argv) { ("output,o", po::value(&output), "output file or directory") + ("image-offset,O", + po::value(&image_offset)->default_value("auto"), + "filesystem image offset in bytes") ("format,f", po::value(&format), "output format") @@ -89,6 +93,13 @@ int dwarfsextract(int argc, char** argv) { try { stream_logger lgr(std::cerr, logger::parse_level(log_level)); filesystem_options fsopts; + try { + fsopts.image_offset = image_offset == "auto" + ? filesystem_options::IMAGE_OFFSET_AUTO + : folly::to(image_offset); + } catch (...) { + DWARFS_THROW(runtime_error, "failed to parse offset: " + image_offset); + } fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str); fsopts.block_cache.num_workers = num_workers;