metadata_v2: nuke v1 filesystem, metadata & inode_reader

This commit is contained in:
Marcus Holland-Moritz 2020-11-28 01:09:43 +01:00
parent ac5847357c
commit 18eb2e83c5
13 changed files with 13 additions and 1734 deletions

View File

@ -97,15 +97,12 @@ list(
src/dwarfs/block_manager.cpp
src/dwarfs/console_writer.cpp
src/dwarfs/entry.cpp
src/dwarfs/filesystem.cpp
src/dwarfs/filesystem_v2.cpp
src/dwarfs/filesystem_writer.cpp
src/dwarfs/fstypes.cpp
src/dwarfs/inode_manager.cpp
src/dwarfs/inode_reader.cpp
src/dwarfs/inode_reader_v2.cpp
src/dwarfs/logger.cpp
src/dwarfs/metadata.cpp
src/dwarfs/metadata_types.cpp
src/dwarfs/metadata_v2.cpp
src/dwarfs/mmap.cpp

View File

@ -1,144 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <exception>
#include <functional>
#include <memory>
#include <ostream>
#include <string>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include "dwarfs/error.h"
#include "dwarfs/logger.h"
#include "dwarfs/mmif.h"
namespace dwarfs {
struct iovec_read_buf;
struct block_cache_options;
struct dir_entry;
struct directory;
class filesystem_writer;
class progress;
class filesystem {
public:
filesystem(logger& lgr, std::shared_ptr<mmif> mm,
const block_cache_options& bc_options,
const struct ::stat* stat_defaults = nullptr,
int inode_offset = 0);
static void rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
filesystem_writer& writer);
static void identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os);
void dump(std::ostream& os) const { impl_->dump(os); }
void walk(std::function<void(const dir_entry*)> const& func) {
impl_->walk(func);
}
const dir_entry* find(const char* path) const { return impl_->find(path); }
const dir_entry* find(int inode) const { return impl_->find(inode); }
const dir_entry* find(int inode, const char* name) const {
return impl_->find(inode, name);
}
int getattr(const dir_entry* de, struct ::stat* stbuf) const {
return impl_->getattr(de, stbuf);
}
int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const {
return impl_->access(de, mode, uid, gid);
}
const directory* opendir(const dir_entry* de) const {
return impl_->opendir(de);
}
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const {
return impl_->readdir(d, offset, name);
}
size_t dirsize(const directory* d) const { return impl_->dirsize(d); }
int readlink(const dir_entry* de, char* buf, size_t size) const {
return impl_->readlink(de, buf, size);
}
int readlink(const dir_entry* de, std::string* buf) const {
return impl_->readlink(de, buf);
}
int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); }
int open(const dir_entry* de) const { return impl_->open(de); }
ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset) const {
return impl_->read(inode, buf, size, offset);
}
ssize_t
readv(uint32_t inode, iovec_read_buf& buf, size_t size, off_t offset) const {
return impl_->readv(inode, buf, size, offset);
}
class impl {
public:
virtual ~impl() = default;
virtual void dump(std::ostream& os) const = 0;
virtual void
walk(std::function<void(const dir_entry*)> const& func) const = 0;
virtual const dir_entry* find(const char* path) const = 0;
virtual const dir_entry* find(int inode) const = 0;
virtual const dir_entry* find(int inode, const char* name) const = 0;
virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0;
virtual int
access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0;
virtual const directory* opendir(const dir_entry* de) const = 0;
virtual const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const = 0;
virtual size_t dirsize(const directory* d) const = 0;
virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0;
virtual int readlink(const dir_entry* de, std::string* buf) const = 0;
virtual int statvfs(struct ::statvfs* stbuf) const = 0;
virtual int open(const dir_entry* de) const = 0;
virtual ssize_t
read(uint32_t inode, char* buf, size_t size, off_t offset) const = 0;
virtual ssize_t readv(uint32_t inode, iovec_read_buf& buf, size_t size,
off_t offset) const = 0;
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -103,7 +103,7 @@ class filesystem_v2 {
int open(entry_view entry) const { return impl_->open(entry); }
ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset) const {
ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset = 0) const {
return impl_->read(inode, buf, size, offset);
}

View File

@ -68,10 +68,6 @@ class filesystem_writer {
impl_->write_block(std::move(data));
}
void write_metadata(std::vector<uint8_t>&& data) {
impl_->write_metadata(std::move(data));
}
void write_metadata_v2_schema(std::vector<uint8_t>&& data) {
impl_->write_metadata_v2_schema(std::move(data));
}
@ -89,7 +85,6 @@ class filesystem_writer {
virtual ~impl() = default;
virtual void write_block(std::vector<uint8_t>&& data) = 0;
virtual void write_metadata(std::vector<uint8_t>&& data) = 0;
virtual void write_metadata_v2_schema(std::vector<uint8_t>&& data) = 0;
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
virtual void flush() = 0;

View File

@ -61,37 +61,6 @@ struct iovec_read_buf {
folly::small_vector<block_range, inline_storage> ranges;
};
/*************************
---------------------
file_header
---------------------
section_header [BLOCK]
block 0
---------------------
section_header [BLOCK]
block n
---------------------
section_header [METADATA]
metadata
---------------------
TODO: better description ;-)
metadata:
links_table -> vector<uint8_t> // links first, potential re-use for names
table :-)
names_table -> vector<uint8_t>
inode_table -> vector<chunk> // sizeof(chunk) aligned (64-bit)
directories...
inode_index: inode -> dir_entry offset
chunk_index: (inode - file_inode_offset) -> chunk offset
*************************/
constexpr uint8_t MAJOR_VERSION = 1;
constexpr uint8_t MINOR_VERSION = 0;
@ -99,46 +68,6 @@ enum class section_type : uint16_t {
BLOCK = 0,
// Optionally compressed block data.
METADATA = 1,
// Optionally compressed metadata. This is just
// another section list.
META_TABLEDATA = 2,
// This is raw data that is indexed from the other
// sections by offset. It contains all names, link
// targets and chunk lists.
// Names are referenced by offset/length. Link targets
// are referenced by offset and actually start with a
// uint16_t storing the length of the remaining string.
// Names are free to share data with links targets.
// Chunk lists are just a vector of chunks, aligned to
// the size of a chunk for efficient access.
META_INODE_INDEX = 3,
// The inode index is a vector of offsets to all inodes
// (i.e. dir_entry* structs). The vector may be offset
// by inode_index_offset if inodes do not start at zero.
META_CHUNK_INDEX = 4,
// The chunk index is a vector of offsets to the start
// of the chunk list for file inodes. As all link and
// directory inodes precede all file inodes, this vector
// is offset by chunk_index_offset. There is one more
// element in the chunk index vector that holds an offset
// to the end of the chunk lists.
META_DIRECTORIES = 5,
// All directory structures, in top-down order. These
// are referenced from within the inode index. The root
// directory also has its dir_entry* struct stored here.
META_CONFIG = 6,
// Configuration data for this filesystem. Defines the
// type of dir_entry* structure being used as well as
// the block size which is needed for working with the
// chunk lists. Also defines inode offsets being used
// and the total inode count (for out-of-bounds checks).
METADATA_V2_SCHEMA = 7,
// Frozen metadata schema.

View File

@ -1,71 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include "dwarfs/block_cache.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
namespace dwarfs {
class inode_reader {
public:
inode_reader() = default;
inode_reader(logger& lgr, block_cache&& bc, unsigned block_size_bits);
inode_reader& operator=(inode_reader&&) = default;
ssize_t read(char* buf, size_t size, off_t offset, const chunk_type* chunk,
size_t chunk_count) const {
return impl_->read(buf, size, offset, chunk, chunk_count);
}
ssize_t readv(iovec_read_buf& buf, size_t size, off_t offset,
const chunk_type* chunk, size_t chunk_count) const {
return impl_->readv(buf, size, offset, chunk, chunk_count);
}
void dump(std::ostream& os, const std::string& indent,
const chunk_type* chunk, size_t chunk_count) const {
impl_->dump(os, indent, chunk, chunk_count);
}
class impl {
public:
virtual ~impl() = default;
virtual ssize_t read(char* buf, size_t size, off_t offset,
const chunk_type* chunk, size_t chunk_count) const = 0;
virtual ssize_t
readv(iovec_read_buf& buf, size_t size, off_t offset,
const chunk_type* chunk, size_t chunk_count) const = 0;
virtual void dump(std::ostream& os, const std::string& indent,
const chunk_type* chunk, size_t chunk_count) const = 0;
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -1,143 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <vector>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
namespace dwarfs {
class metadata {
public:
metadata() = default;
metadata(logger& lgr, std::vector<uint8_t>&& data,
const struct ::stat* defaults, int inode_offset = 0);
metadata& operator=(metadata&&) = default;
static void get_stat_defaults(struct ::stat* defaults);
size_t size() const { return impl_->size(); }
bool empty() const { return !impl_ || impl_->empty(); }
size_t block_size() const { return impl_->block_size(); }
unsigned block_size_bits() const { return impl_->block_size_bits(); }
void
dump(std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const {
impl_->dump(os, icb);
}
void walk(std::function<void(const dir_entry*)> const& func) const {
impl_->walk(func);
}
const dir_entry* find(const char* path) const { return impl_->find(path); }
const dir_entry* find(int inode) const { return impl_->find(inode); }
const dir_entry* find(int inode, const char* name) const {
return impl_->find(inode, name);
}
int getattr(const dir_entry* de, struct ::stat* stbuf) const {
return impl_->getattr(de, stbuf);
}
int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const {
return impl_->access(de, mode, uid, gid);
}
const directory* opendir(const dir_entry* de) const {
return impl_->opendir(de);
}
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const {
return impl_->readdir(d, offset, name);
}
size_t dirsize(const directory* d) const { return impl_->dirsize(d); }
int readlink(const dir_entry* de, char* buf, size_t size) const {
return impl_->readlink(de, buf, size);
}
int readlink(const dir_entry* de, std::string* buf) const {
return impl_->readlink(de, buf);
}
int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); }
int open(const dir_entry* de) const { return impl_->open(de); }
const chunk_type* get_chunks(int inode, size_t& num) const {
return impl_->get_chunks(inode, num);
}
class impl {
public:
virtual ~impl() = default;
virtual size_t size() const = 0;
virtual bool empty() const = 0;
virtual size_t block_size() const = 0;
virtual unsigned block_size_bits() const = 0;
virtual void dump(
std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const = 0;
virtual void
walk(std::function<void(const dir_entry*)> const& func) const = 0;
virtual const dir_entry* find(const char* path) const = 0;
virtual const dir_entry* find(int inode) const = 0;
virtual const dir_entry* find(int inode, const char* name) const = 0;
virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0;
virtual int
access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0;
virtual const directory* opendir(const dir_entry* de) const = 0;
virtual const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const = 0;
virtual size_t dirsize(const directory* d) const = 0;
virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0;
virtual int readlink(const dir_entry* de, std::string* buf) const = 0;
virtual int statvfs(struct ::statvfs* stbuf) const = 0;
virtual int open(const dir_entry* de) const = 0;
virtual const chunk_type* get_chunks(int inode, size_t& num) const = 0;
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -1,377 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <cstddef>
#include <cstring>
#include <folly/Format.h>
#include <folly/container/Enumerate.h>
#include "dwarfs/block_cache.h"
#include "dwarfs/config.h"
#include "dwarfs/filesystem.h"
#include "dwarfs/filesystem_writer.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/inode_reader.h"
#include "dwarfs/metadata.h"
#include "dwarfs/progress.h"
namespace dwarfs {
namespace {
class filesystem_parser {
public:
filesystem_parser(std::shared_ptr<mmif> mm)
: mm_(mm)
, offset_(sizeof(file_header)) {
if (mm_->size() < sizeof(file_header)) {
throw std::runtime_error("file too small");
}
const file_header* fh = mm_->as<file_header>();
if (::memcmp(&fh->magic[0], "DWARFS", 6) != 0 &&
::memcmp(&fh->magic[0], "NANOFS", 6) != 0) { // keep for compatibility
throw std::runtime_error("magic not found");
}
if (fh->major != MAJOR_VERSION) {
throw std::runtime_error("different major version");
}
if (fh->minor > MINOR_VERSION) {
throw std::runtime_error("newer minor version");
}
}
template <typename Logger>
bool next_section(section_header& sh, size_t& start, Logger& lgr) {
if (offset_ + sizeof(section_header) <= mm_->size()) {
::memcpy(&sh, mm_->as<char>(offset_), sizeof(section_header));
lgr.trace() << "section_header@" << offset_ << " (" << sh.to_string()
<< ")";
offset_ += sizeof(section_header);
if (offset_ + sh.length > mm_->size()) {
throw std::runtime_error("truncated file");
}
start = offset_;
offset_ += sh.length;
return true;
}
return false;
}
void rewind() { offset_ = sizeof(file_header); }
private:
std::shared_ptr<mmif> mm_;
size_t offset_;
};
} // namespace
template <typename LoggerPolicy>
class filesystem_ : public filesystem::impl {
public:
filesystem_(logger& lgr_, std::shared_ptr<mmif> mm,
const block_cache_options& bc_options,
const struct ::stat* stat_defaults, int inode_offset);
void dump(std::ostream& os) const override;
void walk(std::function<void(const dir_entry*)> const& func) const override;
const dir_entry* find(const char* path) const override;
const dir_entry* find(int inode) const override;
const dir_entry* find(int inode, const char* name) const override;
int getattr(const dir_entry* de, struct ::stat* stbuf) const override;
int access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const override;
const directory* opendir(const dir_entry* de) const override;
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const override;
size_t dirsize(const directory* d) const override;
int readlink(const dir_entry* de, char* buf, size_t size) const override;
int readlink(const dir_entry* de, std::string* buf) const override;
int statvfs(struct ::statvfs* stbuf) const override;
int open(const dir_entry* de) const override;
ssize_t
read(uint32_t inode, char* buf, size_t size, off_t offset) const override;
ssize_t readv(uint32_t inode, iovec_read_buf& buf, size_t size,
off_t offset) const override;
private:
log_proxy<LoggerPolicy> log_;
std::shared_ptr<mmif> mm_;
metadata meta_;
inode_reader ir_;
};
template <typename LoggerPolicy>
filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
const block_cache_options& bc_options,
const struct ::stat* stat_defaults,
int inode_offset)
: log_(lgr)
, mm_(mm) {
filesystem_parser parser(mm_);
block_cache cache(lgr, bc_options);
section_header sh;
size_t start;
while (parser.next_section(sh, start, log_)) {
switch (sh.type) {
case section_type::BLOCK:
cache.insert(sh.compression, mm_->as<uint8_t>(start),
static_cast<size_t>(sh.length));
break;
case section_type::METADATA:
meta_ = metadata(lgr,
block_decompressor::decompress(
sh.compression, mm_->as<uint8_t>(start), sh.length),
stat_defaults, inode_offset);
break;
case section_type::METADATA_V2_SCHEMA:
case section_type::METADATA_V2:
break;
default:
throw std::runtime_error("unknown section");
}
}
if (meta_.empty()) {
throw std::runtime_error("no metadata found");
}
log_.debug() << "read " << cache.block_count() << " blocks and "
<< meta_.size() << " bytes of metadata";
cache.set_block_size(meta_.block_size());
ir_ = inode_reader(lgr, std::move(cache), meta_.block_size_bits());
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::dump(std::ostream& os) const {
meta_.dump(os, [&](const std::string& indent, uint32_t inode) {
size_t num = 0;
const chunk_type* chunk = meta_.get_chunks(inode, num);
os << indent << num << " chunks in inode " << inode << "\n";
ir_.dump(os, indent + " ", chunk, num);
});
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::walk(
std::function<void(const dir_entry*)> const& func) const {
meta_.walk(func);
}
template <typename LoggerPolicy>
const dir_entry* filesystem_<LoggerPolicy>::find(const char* path) const {
return meta_.find(path);
}
template <typename LoggerPolicy>
const dir_entry* filesystem_<LoggerPolicy>::find(int inode) const {
return meta_.find(inode);
}
template <typename LoggerPolicy>
const dir_entry*
filesystem_<LoggerPolicy>::find(int inode, const char* name) const {
return meta_.find(inode, name);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::getattr(const dir_entry* de,
struct ::stat* stbuf) const {
return meta_.getattr(de, stbuf);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const {
return meta_.access(de, mode, uid, gid);
}
template <typename LoggerPolicy>
const directory* filesystem_<LoggerPolicy>::opendir(const dir_entry* de) const {
return meta_.opendir(de);
}
template <typename LoggerPolicy>
const dir_entry*
filesystem_<LoggerPolicy>::readdir(const directory* d, size_t offset,
std::string* name) const {
return meta_.readdir(d, offset, name);
}
template <typename LoggerPolicy>
size_t filesystem_<LoggerPolicy>::dirsize(const directory* d) const {
return meta_.dirsize(d);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::readlink(const dir_entry* de, char* buf,
size_t size) const {
return meta_.readlink(de, buf, size);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::readlink(const dir_entry* de,
std::string* buf) const {
return meta_.readlink(de, buf);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
// TODO: not sure if that's the right abstraction...
return meta_.statvfs(stbuf);
}
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::open(const dir_entry* de) const {
return meta_.open(de);
}
template <typename LoggerPolicy>
ssize_t filesystem_<LoggerPolicy>::read(uint32_t inode, char* buf, size_t size,
off_t offset) const {
size_t num = 0;
const chunk_type* chunk = meta_.get_chunks(inode, num);
return ir_.read(buf, size, offset, chunk, num);
}
template <typename LoggerPolicy>
ssize_t filesystem_<LoggerPolicy>::readv(uint32_t inode, iovec_read_buf& buf,
size_t size, off_t offset) const {
size_t num = 0;
const chunk_type* chunk = meta_.get_chunks(inode, num);
return ir_.readv(buf, size, offset, chunk, num);
}
filesystem::filesystem(logger& lgr, std::shared_ptr<mmif> mm,
const block_cache_options& bc_options,
const struct ::stat* stat_defaults, int inode_offset)
: impl_(make_unique_logging_object<filesystem::impl, filesystem_,
logger_policies>(
lgr, mm, bc_options, stat_defaults, inode_offset)) {}
void filesystem::rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
filesystem_writer& writer) {
// TODO:
log_proxy<debug_logger_policy> log(lgr);
filesystem_parser parser(mm);
section_header sh;
size_t start;
std::vector<uint8_t> meta_raw;
metadata meta;
while (parser.next_section(sh, start, log)) {
if (sh.type == section_type::METADATA) {
meta_raw = block_decompressor::decompress(
sh.compression, mm->as<uint8_t>(start), sh.length);
auto tmp = meta_raw;
meta = metadata(lgr, std::move(tmp), nullptr);
break;
} else {
++prog.block_count;
}
}
struct ::statvfs stbuf;
meta.statvfs(&stbuf);
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
parser.rewind();
while (parser.next_section(sh, start, log)) {
// TODO: multi-thread this?
switch (sh.type) {
case section_type::BLOCK: {
auto block = block_decompressor::decompress(
sh.compression, mm->as<uint8_t>(start), sh.length);
prog.filesystem_size += block.size();
writer.write_block(std::move(block));
break;
}
case section_type::METADATA:
writer.write_metadata(std::move(meta_raw));
break;
case section_type::METADATA_V2:
// TODO...
break;
default:
throw std::runtime_error("unknown section");
}
}
writer.flush();
}
void filesystem::identify(logger& lgr, std::shared_ptr<mmif> mm,
std::ostream& os) {
// TODO:
log_proxy<debug_logger_policy> log(lgr);
filesystem_parser parser(mm);
section_header sh;
size_t start;
while (parser.next_section(sh, start, log)) {
std::vector<uint8_t> tmp;
block_decompressor bd(sh.compression, mm->as<uint8_t>(start), sh.length,
tmp);
float compression_ratio = float(sh.length) / bd.uncompressed_size();
os << "SECTION " << sh.to_string()
<< ", blocksize=" << bd.uncompressed_size()
<< ", ratio=" << folly::sformat("{:.2%}%", compression_ratio)
<< std::endl;
if (sh.type == section_type::METADATA) {
bd.decompress_frame(bd.uncompressed_size());
metadata meta(lgr, std::move(tmp), nullptr);
struct ::statvfs stbuf;
meta.statvfs(&stbuf);
os << "block size: " << stbuf.f_bsize << std::endl;
os << "inode count: " << stbuf.f_files << std::endl;
os << "original filesystem size: " << stbuf.f_blocks << std::endl;
}
}
}
} // namespace dwarfs

View File

@ -138,7 +138,6 @@ class filesystem_writer_ : public filesystem_writer::impl {
~filesystem_writer_() noexcept;
void write_block(std::vector<uint8_t>&& data) override;
void write_metadata(std::vector<uint8_t>&& data) override;
void write_metadata_v2_schema(std::vector<uint8_t>&& data) override;
void write_metadata_v2(std::vector<uint8_t>&& data) override;
void flush() override;
@ -229,8 +228,7 @@ void filesystem_writer_<LoggerPolicy>::writer_thread() {
fsb->wait_until_compressed();
log_.debug() << (fsb->type() == section_type::METADATA ? "metadata"
: "block")
log_.debug() << get_section_name(fsb->type())
<< " compressed from "
<< size_with_unit(fsb->uncompressed_size()) << " to "
<< size_with_unit(fsb->size());
@ -324,12 +322,6 @@ void filesystem_writer_<LoggerPolicy>::write_block(
write_section(section_type::BLOCK, std::move(data), bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata(
std::vector<uint8_t>&& data) {
write_section(section_type::METADATA, std::move(data), metadata_bc_);
}
template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2_schema(
std::vector<uint8_t>&& data) {

View File

@ -34,12 +34,6 @@ namespace {
const std::map<section_type, std::string> sections{
#define SECTION_TYPE_(x) {section_type::x, #x}
SECTION_TYPE_(BLOCK),
SECTION_TYPE_(METADATA),
SECTION_TYPE_(META_TABLEDATA),
SECTION_TYPE_(META_INODE_INDEX),
SECTION_TYPE_(META_CHUNK_INDEX),
SECTION_TYPE_(META_DIRECTORIES),
SECTION_TYPE_(META_CONFIG),
SECTION_TYPE_(METADATA_V2_SCHEMA),
SECTION_TYPE_(METADATA_V2),
#undef SECTION_TYPE_

View File

@ -1,211 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <cstring>
#include <mutex>
#include <folly/stats/Histogram.h>
#include "dwarfs/block_cache.h"
#include "dwarfs/config.h"
#include "dwarfs/inode_reader.h"
namespace dwarfs {
template <typename LoggerPolicy, unsigned BlockSizeBits>
class inode_reader_ : public inode_reader::impl {
public:
using access = chunk_access<BlockSizeBits>;
inode_reader_(logger& lgr, block_cache&& bc)
: cache_(std::move(bc))
, log_(lgr)
, iovec_sizes_(1, 0, 256) {}
~inode_reader_() {
std::lock_guard<std::mutex> lock(iovec_sizes_mutex_);
log_.info() << "iovec size p90: "
<< iovec_sizes_.getPercentileEstimate(0.9);
log_.info() << "iovec size p95: "
<< iovec_sizes_.getPercentileEstimate(0.95);
log_.info() << "iovec size p99: "
<< iovec_sizes_.getPercentileEstimate(0.99);
}
ssize_t read(char* buf, size_t size, off_t offset, const chunk_type* chunk,
size_t chunk_count) const override;
ssize_t readv(iovec_read_buf& buf, size_t size, off_t offset,
const chunk_type* chunk, size_t chunk_count) const override;
void dump(std::ostream& os, const std::string& indent,
const chunk_type* chunk, size_t chunk_count) const override;
private:
template <typename StoreFunc>
ssize_t read(size_t size, off_t offset, const chunk_type* chunk,
size_t chunk_count, const StoreFunc& store) const;
block_cache cache_;
log_proxy<LoggerPolicy> log_;
mutable folly::Histogram<size_t> iovec_sizes_;
mutable std::mutex iovec_sizes_mutex_;
};
template <typename LoggerPolicy, unsigned BlockSizeBits>
void inode_reader_<LoggerPolicy, BlockSizeBits>::dump(
std::ostream& os, const std::string& indent, const chunk_type* chunk,
size_t chunk_count) const {
for (size_t i = 0; i < chunk_count; ++i) {
os << indent << "[" << i << "] block=" << access::block(chunk[i])
<< ", offset=" << access::offset(chunk[i])
<< ", size=" << access::size(chunk[i]) << "\n";
}
}
template <typename LoggerPolicy, unsigned BlockSizeBits>
template <typename StoreFunc>
ssize_t
inode_reader_<LoggerPolicy, BlockSizeBits>::read(size_t size, off_t offset,
const chunk_type* chunk,
size_t chunk_count,
const StoreFunc& store) const {
if (offset < 0) {
return -EINVAL;
}
if (size == 0 || chunk_count == 0) {
return 0;
}
const chunk_type* first = chunk;
const chunk_type* last = first + chunk_count;
size_t num_read = 0;
// search for the first chunk that contains data from this request
while (first < last) {
size_t chunksize = access::size(*first);
if (static_cast<size_t>(offset) < chunksize) {
num_read = chunksize - offset;
break;
}
offset -= chunksize;
++first;
}
if (first == last) {
// offset beyond EOF; TODO: check if this should rather be -EINVAL
return 0;
}
// request ranges from block cache
std::vector<std::future<block_range>> ranges;
for (chunk = first, num_read = 0; chunk < last and num_read < size; ++chunk) {
size_t chunksize = access::size(*chunk) - offset;
size_t chunkoff = access::offset(*chunk) + offset;
if (num_read + chunksize > size) {
chunksize = size - num_read;
}
ranges.emplace_back(cache_.get(access::block(*chunk), chunkoff, chunksize));
num_read += chunksize;
offset = 0;
}
// now fill the buffer
num_read = 0;
for (auto& r : ranges) {
auto br = r.get();
store(num_read, br);
num_read += br.size();
}
return num_read;
}
template <typename LoggerPolicy, unsigned BlockSizeBits>
ssize_t
inode_reader_<LoggerPolicy, BlockSizeBits>::read(char* buf, size_t size,
off_t offset,
const chunk_type* chunk,
size_t chunk_count) const {
return read(size, offset, chunk, chunk_count,
[&](size_t num_read, const block_range& br) {
::memcpy(buf + num_read, br.data(), br.size());
});
}
template <typename LoggerPolicy, unsigned BlockSizeBits>
ssize_t
inode_reader_<LoggerPolicy, BlockSizeBits>::readv(iovec_read_buf& buf,
size_t size, off_t offset,
const chunk_type* chunk,
size_t chunk_count) const {
auto rv = read(size, offset, chunk, chunk_count,
[&](size_t, const block_range& br) {
buf.buf.resize(buf.buf.size() + 1);
buf.buf.back().iov_base = const_cast<uint8_t*>(br.data());
buf.buf.back().iov_len = br.size();
buf.ranges.emplace_back(br);
});
{
std::lock_guard<std::mutex> lock(iovec_sizes_mutex_);
iovec_sizes_.addValue(buf.buf.size());
}
return rv;
}
namespace {
template <unsigned BlockSizeBits = MAX_BLOCK_BITS_SIZE>
struct inode_reader_factory {
template <typename T>
using inode_reader_type = inode_reader_<T, BlockSizeBits>;
static std::unique_ptr<inode_reader::impl>
create(logger& lgr, block_cache&& bc, unsigned block_size_bits) {
if (block_size_bits == BlockSizeBits) {
return make_unique_logging_object<inode_reader::impl, inode_reader_type,
logger_policies>(lgr, std::move(bc));
}
return inode_reader_factory<BlockSizeBits - 1>::create(lgr, std::move(bc),
block_size_bits);
}
};
template <>
struct inode_reader_factory<MIN_BLOCK_BITS_SIZE - 1> {
static std::unique_ptr<inode_reader::impl>
create(logger&, block_cache&&, unsigned) {
throw std::runtime_error("unsupported block_size_bits");
}
};
} // namespace
inode_reader::inode_reader(logger& lgr, block_cache&& bc,
unsigned block_size_bits)
: impl_(inode_reader_factory<>::create(lgr, std::move(bc),
block_size_bits)) {}
} // namespace dwarfs

View File

@ -1,685 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <cstring>
#include <unistd.h>
#include "dwarfs/metadata.h"
namespace dwarfs {
namespace {
const uint16_t READ_ONLY_MASK = ~(S_IWUSR | S_IWGRP | S_IWOTH);
}
class dir_reader {
public:
static std::shared_ptr<dir_reader>
create(dir_entry_type de_type, const struct ::stat& defaults,
const char* data, int inode_offset);
virtual ~dir_reader() = default;
virtual const dir_entry*
find(const directory* d, const char* path, size_t clen) const = 0;
virtual void
getattr(const dir_entry* de, struct ::stat* stbuf, size_t filesize) const = 0;
virtual int
access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0;
virtual const dir_entry* readdir(const directory* d, size_t offset,
std::string* name = nullptr) const = 0;
};
template <typename DirEntryType>
class dir_reader_ : public dir_reader {
public:
dir_reader_(const struct ::stat& defaults, const char* data, int inode_offset)
: defaults_(defaults)
, data_(data)
, inode_offset_(inode_offset) {}
const dir_entry*
find(const directory* d, const char* path, size_t clen) const override {
auto begin = reinterpret_cast<const DirEntryType*>(&d->u);
auto end = begin + d->count;
auto de = std::lower_bound(
begin, end, path, [&](const DirEntryType& de, const char* p) {
const dir_entry* e = reinterpret_cast<const dir_entry*>(&de);
int cmp = ::strncmp(
nameptr(e), p, std::min(static_cast<size_t>(e->name_size), clen));
return cmp < 0 or (cmp == 0 and e->name_size < clen);
});
auto e = reinterpret_cast<const dir_entry*>(de);
if (de != end and e->name_size == clen and
::strncmp(nameptr(e), path, clen) == 0) {
return e;
}
return nullptr;
}
void getattr(const dir_entry* de, struct ::stat* stbuf,
size_t filesize) const override {
stbuf->st_mode = de->mode & READ_ONLY_MASK;
stbuf->st_size = filesize;
stbuf->st_ino = de->inode + inode_offset_;
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
stbuf->st_uid = getuid(de);
stbuf->st_gid = getgid(de);
gettimes(de, stbuf);
}
int access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const override {
if (mode == F_OK) {
// easy; we're only interested in the file's existance
return 0;
}
int de_mode = 0;
auto test = [de, &de_mode](uint16_t r_bit, uint16_t x_bit) {
if (de->mode & r_bit) {
de_mode |= R_OK;
}
if (de->mode & x_bit) {
de_mode |= X_OK;
}
};
// Let's build the entry's access mask
test(S_IROTH, S_IXOTH);
if (getgid(de) == gid) {
test(S_IRGRP, S_IXGRP);
}
if (getuid(de) == uid) {
test(S_IRUSR, S_IXUSR);
}
return (de_mode & mode) == mode ? 0 : EACCES;
}
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const override {
auto begin = reinterpret_cast<const DirEntryType*>(&d->u);
auto de = reinterpret_cast<const dir_entry*>(begin + offset);
if (name) {
name->assign(nameptr(de), de->name_size);
}
return de;
}
private:
uid_t getuid(const dir_entry* de) const;
gid_t getgid(const dir_entry* de) const;
void gettimes(const dir_entry* de, struct ::stat* stbuf) const;
template <typename T>
const T* as(size_t offset = 0) const {
return reinterpret_cast<const T*>(data_ + offset);
}
const char* nameptr(const dir_entry* de) const {
return as<char>(de->name_offset);
}
const struct ::stat defaults_;
const char* data_;
const int inode_offset_;
};
template <>
uid_t dir_reader_<dir_entry>::getuid(const dir_entry*) const {
return defaults_.st_uid;
}
template <>
gid_t dir_reader_<dir_entry>::getgid(const dir_entry*) const {
return defaults_.st_gid;
}
template <>
uid_t dir_reader_<dir_entry_ug>::getuid(const dir_entry* de) const {
auto real_de = reinterpret_cast<const dir_entry_ug*>(de);
return real_de->owner;
}
template <>
gid_t dir_reader_<dir_entry_ug>::getgid(const dir_entry* de) const {
auto real_de = reinterpret_cast<const dir_entry_ug*>(de);
return real_de->group;
}
template <>
uid_t dir_reader_<dir_entry_ug_time>::getuid(const dir_entry* de) const {
auto real_de = reinterpret_cast<const dir_entry_ug_time*>(de);
return real_de->ug.owner;
}
template <>
gid_t dir_reader_<dir_entry_ug_time>::getgid(const dir_entry* de) const {
auto real_de = reinterpret_cast<const dir_entry_ug_time*>(de);
return real_de->ug.group;
}
template <>
void dir_reader_<dir_entry>::gettimes(const dir_entry*,
struct ::stat* stbuf) const {
stbuf->st_atime = defaults_.st_atime;
stbuf->st_mtime = defaults_.st_mtime;
stbuf->st_ctime = defaults_.st_ctime;
}
template <>
void dir_reader_<dir_entry_ug>::gettimes(const dir_entry*,
struct ::stat* stbuf) const {
stbuf->st_atime = defaults_.st_atime;
stbuf->st_mtime = defaults_.st_mtime;
stbuf->st_ctime = defaults_.st_ctime;
}
template <>
void dir_reader_<dir_entry_ug_time>::gettimes(const dir_entry* de,
struct ::stat* stbuf) const {
auto real_de = reinterpret_cast<const dir_entry_ug_time*>(de);
stbuf->st_atime = real_de->atime;
stbuf->st_mtime = real_de->mtime;
stbuf->st_ctime = real_de->ctime;
}
std::shared_ptr<dir_reader>
dir_reader::create(dir_entry_type de_type, const struct ::stat& defaults,
const char* data, int inode_offset) {
switch (de_type) {
case dir_entry_type::DIR_ENTRY:
return std::make_shared<dir_reader_<dir_entry>>(defaults, data,
inode_offset);
case dir_entry_type::DIR_ENTRY_UG:
return std::make_shared<dir_reader_<dir_entry_ug>>(defaults, data,
inode_offset);
case dir_entry_type::DIR_ENTRY_UG_TIME:
return std::make_shared<dir_reader_<dir_entry_ug_time>>(defaults, data,
inode_offset);
default:
throw std::runtime_error("unknown dir_entry_type");
}
}
// TODO: move out of here
template <typename LoggerPolicy>
class metadata_ : public metadata::impl {
public:
metadata_(logger& lgr, std::vector<uint8_t>&& meta,
const struct ::stat* defaults, int inode_offset)
: data_(std::move(meta))
, inode_offset_(inode_offset)
, log_(lgr) {
parse(defaults);
}
size_t size() const override { return data_.size(); }
bool empty() const override { return data_.empty(); }
size_t block_size() const override {
return static_cast<size_t>(1) << cfg_->block_size_bits;
}
unsigned block_size_bits() const override { return cfg_->block_size_bits; }
void dump(std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb)
const override;
void walk(std::function<void(const dir_entry*)> const& func) const override;
const dir_entry* find(const char* path) const override;
const dir_entry* find(int inode) const override;
const dir_entry* find(int inode, const char* name) const override;
int getattr(const dir_entry* de, struct ::stat* stbuf) const override;
int access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const override;
const directory* opendir(const dir_entry* de) const override;
const dir_entry*
readdir(const directory* d, size_t offset, std::string* name) const override;
size_t dirsize(const directory* d) const override {
return d->count + 2; // adds '.' and '..', which we fake in ;-)
}
int readlink(const dir_entry* de, char* buf, size_t size) const override;
int readlink(const dir_entry* de, std::string* buf) const override;
int statvfs(struct ::statvfs* stbuf) const override;
int open(const dir_entry* de) const override;
const chunk_type* get_chunks(int inode, size_t& num) const override;
private:
void dump(std::ostream& os, const std::string& indent, const dir_entry* de,
std::function<void(const std::string&, uint32_t)> const& icb) const;
void dump(std::ostream& os, const std::string& indent, const directory* dir,
std::function<void(const std::string&, uint32_t)> const& icb) const;
void walk(const dir_entry* de,
std::function<void(const dir_entry*)> const& func) const;
std::string modestring(const dir_entry* de) const;
std::string name(const dir_entry* de) const {
return std::string(as<char>(de->name_offset), de->name_size);
}
size_t filesize(const dir_entry* de) const {
if (S_ISREG(de->mode)) {
return de->u.file_size;
} else if (S_ISLNK(de->mode)) {
return linksize(de);
} else {
return 0;
}
}
size_t linksize(const dir_entry* de) const {
return *as<uint16_t>(de->u.offset);
}
std::string linkname(const dir_entry* de) const {
size_t offs = de->u.offset;
return std::string(as<char>(offs + sizeof(uint16_t)), *as<uint16_t>(offs));
}
const char* linkptr(const dir_entry* de) const {
return as<char>(de->u.offset + sizeof(uint16_t));
}
const directory* getdir(const dir_entry* de) const {
return as<directory>(de->u.offset);
}
template <typename T>
const T* as(size_t offset = 0) const {
return reinterpret_cast<const T*>(
reinterpret_cast<const char*>(data_.data()) + offset);
}
const dir_entry* get_entry(int inode) const {
inode -= inode_offset_;
return inode >= 0 && inode < static_cast<int>(cfg_->inode_count)
? as<dir_entry>(inode_index_[inode])
: nullptr;
}
void parse(const struct ::stat* defaults);
std::vector<uint8_t> data_;
const uint32_t* chunk_index_ = nullptr;
const uint32_t* inode_index_ = nullptr;
const dir_entry* root_ = nullptr;
const meta_config* cfg_ = nullptr;
const int inode_offset_;
std::shared_ptr<dir_reader> dir_reader_;
log_proxy<LoggerPolicy> log_;
};
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::parse(const struct ::stat* defaults) {
size_t offset = 0;
while (offset + sizeof(section_header) <= size()) {
const section_header* sh = as<section_header>(offset);
log_.debug() << "section_header@" << offset << " (" << sh->to_string()
<< ")";
offset += sizeof(section_header);
if (offset + sh->length > size()) {
throw std::runtime_error("truncated metadata");
}
if (sh->compression != compression_type::NONE) {
throw std::runtime_error("unsupported metadata compression type");
}
switch (sh->type) {
case section_type::META_TABLEDATA:
case section_type::META_DIRECTORIES:
// ok, ignore
break;
case section_type::META_CHUNK_INDEX:
chunk_index_ = as<uint32_t>(offset);
break;
case section_type::META_INODE_INDEX:
inode_index_ = as<uint32_t>(offset);
break;
case section_type::META_CONFIG:
cfg_ = as<meta_config>(offset);
break;
default:
throw std::runtime_error("unknown metadata section");
}
offset += sh->length;
}
// TODO: moar checkz
if (!cfg_) {
throw std::runtime_error("no metadata configuration found");
}
struct ::stat stat_defaults;
if (defaults) {
stat_defaults = *defaults;
} else {
metadata::get_stat_defaults(&stat_defaults);
}
chunk_index_ -= cfg_->chunk_index_offset;
inode_index_ -= cfg_->inode_index_offset;
root_ = as<dir_entry>(inode_index_[0]);
dir_reader_ = dir_reader::create(cfg_->de_type, stat_defaults,
reinterpret_cast<const char*>(data_.data()),
inode_offset_);
}
template <typename LoggerPolicy>
std::string metadata_<LoggerPolicy>::modestring(const dir_entry* de) const {
std::ostringstream oss;
oss << (de->mode & S_ISUID ? 'U' : '-');
oss << (de->mode & S_ISGID ? 'G' : '-');
oss << (de->mode & S_ISVTX ? 'S' : '-');
oss << (de->mode & S_IRUSR ? 'r' : '-');
oss << (de->mode & S_IWUSR ? 'w' : '-');
oss << (de->mode & S_IXUSR ? 'x' : '-');
oss << (de->mode & S_IRGRP ? 'r' : '-');
oss << (de->mode & S_IWGRP ? 'w' : '-');
oss << (de->mode & S_IXGRP ? 'x' : '-');
oss << (de->mode & S_IROTH ? 'r' : '-');
oss << (de->mode & S_IWOTH ? 'w' : '-');
oss << (de->mode & S_IXOTH ? 'x' : '-');
return oss.str();
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::dump(
std::ostream& os, const std::string& indent, const dir_entry* de,
std::function<void(const std::string&, uint32_t)> const& icb) const {
os << indent << "<" << de->inode << ":"
<< (reinterpret_cast<const uint8_t*>(de) - data_.data()) << "> "
<< modestring(de) << " " << name(de);
if (S_ISREG(de->mode)) {
os << " " << filesize(de) << "\n";
icb(indent + " ", de->inode);
} else if (S_ISDIR(de->mode)) {
auto dir = getdir(de);
os << " => " << (reinterpret_cast<const uint8_t*>(dir) - data_.data())
<< "\n";
dump(os, indent + " ", dir, std::move(icb));
} else if (S_ISLNK(de->mode)) {
os << " -> " << linkname(de) << "\n";
} else {
os << " (unknown type)\n";
}
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::dump(
std::ostream& os, const std::string& indent, const directory* dir,
std::function<void(const std::string&, uint32_t)> const& icb) const {
os << indent << "(" << dir->count << ") entries\n";
for (size_t i = 0; i < dir->count; ++i) {
dump(os, indent, dir_reader_->readdir(dir, i), icb);
}
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::dump(
std::ostream& os,
std::function<void(const std::string&, uint32_t)> const& icb) const {
dump(os, "", root_, icb);
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::walk(
const dir_entry* de,
std::function<void(const dir_entry*)> const& func) const {
func(de);
if (S_ISDIR(de->mode)) {
auto dir = getdir(de);
for (size_t i = 0; i < dir->count; ++i) {
walk(dir_reader_->readdir(dir, i), func);
}
}
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::walk(
std::function<void(const dir_entry*)> const& func) const {
walk(root_, func);
}
template <typename LoggerPolicy>
const dir_entry* metadata_<LoggerPolicy>::find(const char* path) const {
while (*path and *path == '/') {
++path;
}
const dir_entry* de = root_;
while (*path) {
const char* next = ::strchr(path, '/');
size_t clen = next ? next - path : ::strlen(path);
de = dir_reader_->find(getdir(de), path, clen);
if (!de) {
break;
}
path = next ? next + 1 : path + clen;
}
return de;
}
template <typename LoggerPolicy>
const dir_entry* metadata_<LoggerPolicy>::find(int inode) const {
return get_entry(inode);
}
template <typename LoggerPolicy>
const dir_entry*
metadata_<LoggerPolicy>::find(int inode, const char* name) const {
auto de = get_entry(inode);
if (de) {
de = dir_reader_->find(getdir(de), name, ::strlen(name));
}
return de;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::getattr(const dir_entry* de,
struct ::stat* stbuf) const {
::memset(stbuf, 0, sizeof(*stbuf));
dir_reader_->getattr(de, stbuf, filesize(de));
return 0;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::access(const dir_entry* de, int mode, uid_t uid,
gid_t gid) const {
return dir_reader_->access(de, mode, uid, gid);
}
template <typename LoggerPolicy>
const directory* metadata_<LoggerPolicy>::opendir(const dir_entry* de) const {
if (S_ISDIR(de->mode)) {
return getdir(de);
}
return nullptr;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::open(const dir_entry* de) const {
if (S_ISREG(de->mode)) {
return de->inode;
}
return -1;
}
template <typename LoggerPolicy>
const dir_entry*
metadata_<LoggerPolicy>::readdir(const directory* d, size_t offset,
std::string* name) const {
const dir_entry* de;
switch (offset) {
case 0:
de = as<dir_entry>(d->self);
if (name) {
name->assign(".");
}
break;
case 1:
de = as<dir_entry>(d->parent);
if (name) {
name->assign("..");
}
break;
default:
offset -= 2;
if (offset < d->count) {
de = dir_reader_->readdir(d, offset, name);
} else {
return nullptr;
}
break;
}
return de;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::readlink(const dir_entry* de, char* buf,
size_t size) const {
if (S_ISLNK(de->mode)) {
size_t lsize = linksize(de);
::memcpy(buf, linkptr(de), std::min(lsize, size));
if (size > lsize) {
buf[lsize] = '\0';
}
return 0;
}
return -EINVAL;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::readlink(const dir_entry* de,
std::string* buf) const {
if (S_ISLNK(de->mode)) {
size_t lsize = linksize(de);
buf->assign(linkptr(de), lsize);
return 0;
}
return -EINVAL;
}
template <typename LoggerPolicy>
int metadata_<LoggerPolicy>::statvfs(struct ::statvfs* stbuf) const {
::memset(stbuf, 0, sizeof(*stbuf));
stbuf->f_bsize = 1UL << cfg_->block_size_bits;
stbuf->f_frsize = 1UL;
stbuf->f_blocks = cfg_->orig_fs_size;
stbuf->f_files = cfg_->inode_count;
stbuf->f_flag = ST_RDONLY;
stbuf->f_namemax = PATH_MAX;
return 0;
}
template <typename LoggerPolicy>
const chunk_type*
metadata_<LoggerPolicy>::get_chunks(int inode, size_t& num) const {
inode -= inode_offset_;
if (inode < static_cast<int>(cfg_->chunk_index_offset) ||
inode >= static_cast<int>(cfg_->inode_count)) {
return nullptr;
}
uint32_t off = chunk_index_[inode];
num = (chunk_index_[inode + 1] - off) / sizeof(chunk_type);
return as<chunk_type>(off);
}
void metadata::get_stat_defaults(struct ::stat* defaults) {
::memset(defaults, 0, sizeof(struct ::stat));
defaults->st_uid = ::geteuid();
defaults->st_gid = ::getegid();
time_t t = ::time(nullptr);
defaults->st_atime = t;
defaults->st_mtime = t;
defaults->st_ctime = t;
}
metadata::metadata(logger& lgr, std::vector<uint8_t>&& data,
const struct ::stat* defaults, int inode_offset)
: impl_(make_unique_logging_object<metadata::impl, metadata_,
logger_policies>(
lgr, std::move(data), defaults, inode_offset)) {}
} // namespace dwarfs

View File

@ -24,7 +24,7 @@
#include <folly/Conv.h>
#include <folly/String.h>
#include "dwarfs/filesystem.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/mmap.h"
#include "dwarfs/options.h"
@ -85,16 +85,19 @@ int dwarfsbench(int argc, char** argv) {
bco.num_workers = num_workers;
bco.decompress_ratio = folly::to<double>(decompress_ratio_str);
dwarfs::filesystem fs(lgr, std::make_shared<dwarfs::mmap>(filesystem), bco);
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem), bco);
worker_group wg("reader", num_readers);
fs.walk([&](const dir_entry* de) {
if (S_ISREG(de->mode)) {
wg.add_job([&fs, de] {
std::vector<char> buf(de->u.file_size);
int fh = fs.open(de);
fs.read(fh, buf.data(), buf.size(), 0);
fs.walk([&](auto entry) {
if (S_ISREG(entry.mode())) {
wg.add_job([&fs, entry] {
struct ::stat stbuf;
if (fs.getattr(entry, &stbuf) == 0) {
std::vector<char> buf(stbuf.st_size);
int fh = fs.open(entry);
fs.read(fh, buf.data(), buf.size());
}
});
}
});