Add support for storing char/block devices, pipes, sockets

Char/block devices won't work on FUSE filesystems by design, but
they can still be e.g. rsync'd from the mounted image to a "real"
file system.

Pipes should work just fine, sockets are currently untested.
This commit is contained in:
Marcus Holland-Moritz 2020-12-02 13:03:22 +01:00
parent a9b1f3fabe
commit 0a248ecbf6
8 changed files with 274 additions and 48 deletions

View File

@ -225,6 +225,7 @@ list(
add_library(
thrift_light
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/FieldRef.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/CompactProtocol.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/BinaryProtocol.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift/thrift/lib/cpp2/protocol/DebugProtocol.cpp

View File

@ -46,6 +46,7 @@ class metadata;
class file;
class link;
class dir;
class device;
class inode;
class os_access;
class progress;
@ -55,13 +56,14 @@ class entry_visitor {
public:
virtual ~entry_visitor() = default;
virtual void visit(file* p) = 0;
virtual void visit(device* p) = 0;
virtual void visit(link* p) = 0;
virtual void visit(dir* p) = 0;
};
class entry : public file_interface {
public:
enum type_t { E_FILE, E_DIR, E_LINK };
enum type_t { E_FILE, E_DIR, E_LINK, E_DEVICE, E_OTHER };
entry(const std::string& name, std::shared_ptr<entry> parent,
const struct ::stat& st);
@ -156,6 +158,25 @@ class link : public entry {
uint32_t inode_{0};
};
/**
* A `device` actually represents anything that's not a file,
* dir or link.
*/
class device : public entry {
public:
using entry::entry;
type_t type() const override;
void set_inode(uint32_t inode);
void accept(entry_visitor& v, bool preorder) override;
uint32_t inode_num() const override { return inode_; }
void scan(os_access& os, progress& prog) override;
uint64_t device_id() const;
private:
uint32_t inode_{0};
};
class entry_factory {
public:
static std::unique_ptr<entry_factory> create(bool with_similarity = false);

View File

@ -58,6 +58,7 @@ class progress {
std::atomic<size_t> dirs_scanned{0};
std::atomic<size_t> links_found{0};
std::atomic<size_t> links_scanned{0};
std::atomic<size_t> specials_found{0};
std::atomic<size_t> duplicate_files{0};
std::atomic<size_t> block_count{0};
std::atomic<size_t> chunk_count{0};

View File

@ -73,6 +73,10 @@ std::string entry::type_string() const {
return "link";
case E_DIR:
return "dir";
case E_DEVICE:
return "device";
case E_OTHER:
return "pipe/socket";
default:
throw std::runtime_error("invalid file type");
}
@ -220,6 +224,19 @@ void link::scan(os_access& os, progress& prog) {
prog.original_size += size();
}
entry::type_t device::type() const {
auto mode = status().st_mode;
return S_ISCHR(mode) || S_ISBLK(mode) ? E_DEVICE : E_OTHER;
}
void device::set_inode(uint32_t inode) { inode_ = inode; }
void device::accept(entry_visitor& v, bool) { v.visit(this); }
void device::scan(os_access&, progress&) {}
uint64_t device::device_id() const { return status().st_rdev; }
class entry_factory_ : public entry_factory {
public:
entry_factory_(bool with_similarity)
@ -231,14 +248,18 @@ class entry_factory_ : public entry_factory {
struct ::stat st;
os.lstat(p, &st);
auto mode = st.st_mode;
if (S_ISREG(st.st_mode)) {
if (S_ISREG(mode)) {
return std::make_shared<file>(name, std::move(parent), st,
with_similarity_);
} else if (S_ISDIR(st.st_mode)) {
} else if (S_ISDIR(mode)) {
return std::make_shared<dir>(name, std::move(parent), st);
} else if (S_ISLNK(st.st_mode)) {
} else if (S_ISLNK(mode)) {
return std::make_shared<link>(name, std::move(parent), st);
} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
S_ISSOCK(mode)) {
return std::make_shared<device>(name, std::move(parent), st);
} else {
// TODO: warn
}

View File

@ -34,6 +34,8 @@
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
#include <fmt/format.h>
#include "dwarfs/logger.h"
#include "dwarfs/metadata_v2.h"
@ -103,11 +105,12 @@ class metadata_ : public metadata_v2::impl {
, root_(meta_.entries()[meta_.entry_index()[0]], &meta_)
, log_(lgr)
, inode_offset_(inode_offset)
, chunk_index_offset_(
find_index_offset(meta_.entry_index().size(),
[](uint16_t mode) { return S_ISREG(mode); }))
, link_index_offset_(find_index_offset(
chunk_index_offset_, [](uint16_t mode) { return S_ISLNK(mode); })) {
, link_index_offset_(find_index_offset(inode_rank::INO_LNK))
, chunk_index_offset_(find_index_offset(inode_rank::INO_REG))
, dev_index_offset_(find_index_offset(inode_rank::INO_DEV)) {
log_.debug() << "link index offset: " << link_index_offset_;
log_.debug() << "chunk index offset: " << chunk_index_offset_;
log_.debug() << "device index offset: " << dev_index_offset_;
}
void dump(std::ostream& os, int detail_level,
@ -159,15 +162,63 @@ class metadata_ : public metadata_v2::impl {
return make_entry_view(meta_.entry_index()[inode]);
}
template <typename Func>
size_t find_index_offset(size_t last, Func&& func) const {
auto range = boost::irange(size_t(0), last);
// This represents the order in which inodes are stored in entry_index
enum class inode_rank {
INO_DIR,
INO_LNK,
INO_REG,
INO_DEV,
INO_OTH,
};
auto it =
std::upper_bound(range.begin(), range.end(), 0, [&](int, auto inode) {
auto e = make_entry_view_from_inode(inode);
return bool(func(e.mode()));
});
static inode_rank get_inode_rank(uint16_t mode) {
switch ((mode)&S_IFMT) {
case S_IFDIR:
return inode_rank::INO_DIR;
case S_IFLNK:
return inode_rank::INO_LNK;
case S_IFREG:
return inode_rank::INO_REG;
case S_IFBLK:
case S_IFCHR:
return inode_rank::INO_DEV;
case S_IFSOCK:
case S_IFIFO:
return inode_rank::INO_OTH;
default:
throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode));
}
}
static char get_filetype_label(uint16_t mode) {
switch ((mode)&S_IFMT) {
case S_IFDIR:
return 'd';
case S_IFLNK:
return 'l';
case S_IFREG:
return '-';
case S_IFBLK:
return 'b';
case S_IFCHR:
return 'c';
case S_IFSOCK:
return 's';
case S_IFIFO:
return 'p';
default:
throw std::runtime_error(fmt::format("unknown file type: {:#06x}", mode));
}
}
size_t find_index_offset(inode_rank rank) const {
auto range = boost::irange(size_t(0), meta_.entry_index().size());
auto it = std::lower_bound(range.begin(), range.end(), rank,
[&](auto inode, inode_rank r) {
auto e = make_entry_view_from_inode(inode);
return get_inode_rank(e.mode()) < r;
});
return *it;
}
@ -226,13 +277,22 @@ class metadata_ : public metadata_v2::impl {
.links()[meta_.link_index()[entry.inode() - link_index_offset_]];
}
uint64_t get_device_id(int inode) const {
if (auto devs = meta_.devices()) {
return (*devs)[inode - dev_index_offset_];
}
log_.error() << "get_device_id() called, but no devices in file system";
return 0;
}
folly::ByteRange data_;
MappedFrozen<thrift::metadata::metadata> meta_;
entry_view root_;
log_proxy<LoggerPolicy> log_;
const int inode_offset_;
const int chunk_index_offset_;
const int link_index_offset_;
const int chunk_index_offset_;
const int dev_index_offset_;
};
template <typename LoggerPolicy>
@ -262,8 +322,14 @@ void metadata_<LoggerPolicy>::dump(
std::move(icb));
} else if (S_ISLNK(mode)) {
os << " -> " << link_value(entry) << "\n";
} else {
os << " (unknown type)\n";
} else if (S_ISBLK(mode)) {
os << " (block device: " << get_device_id(inode) << ")\n";
} else if (S_ISCHR(mode)) {
os << " (char device: " << get_device_id(inode) << ")\n";
} else if (S_ISFIFO(mode)) {
os << " (named pipe)\n";
} else if (S_ISSOCK(mode)) {
os << " (socket)\n";
}
}
@ -315,7 +381,7 @@ std::string metadata_<LoggerPolicy>::modestring(uint16_t mode) const {
oss << (mode & S_ISUID ? 'U' : '-');
oss << (mode & S_ISGID ? 'G' : '-');
oss << (mode & S_ISVTX ? 'S' : '-');
oss << (S_ISDIR(mode) ? 'd' : S_ISLNK(mode) ? 'l' : '-');
oss << get_filetype_label(mode);
oss << (mode & S_IRUSR ? 'r' : '-');
oss << (mode & S_IWUSR ? 'w' : '-');
oss << (mode & S_IXUSR ? 'x' : '-');
@ -419,10 +485,11 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
auto mode = entry.mode();
auto timebase = meta_.timestamp_base();
auto inode = entry.inode();
stbuf->st_mode = mode;
stbuf->st_size = file_size(entry, mode);
stbuf->st_ino = entry.inode() + inode_offset_;
stbuf->st_ino = inode + inode_offset_;
stbuf->st_blocks = (stbuf->st_size + 511) / 512;
stbuf->st_uid = entry.getuid();
stbuf->st_gid = entry.getgid();
@ -430,6 +497,10 @@ int metadata_<LoggerPolicy>::getattr(entry_view entry,
stbuf->st_mtime = timebase + entry.mtime_offset();
stbuf->st_ctime = timebase + entry.ctime_offset();
if (S_ISBLK(mode) || S_ISCHR(mode)) {
stbuf->st_rdev = get_device_id(inode);
}
return 0;
}

View File

@ -64,6 +64,7 @@ class visitor_base : public entry_visitor {
void visit(file*) override {}
void visit(link*) override {}
void visit(dir*) override {}
void visit(device*) override {}
};
class scan_files_visitor : public visitor_base {
@ -150,6 +151,40 @@ class link_set_inode_visitor : public visitor_base {
uint32_t& inode_no_;
};
class device_set_inode_visitor : public visitor_base {
public:
device_set_inode_visitor(uint32_t& inode_no)
: inode_no_(inode_no) {}
void visit(device* p) override {
if (p->type() == entry::E_DEVICE) {
p->set_inode(inode_no_++);
dev_ids_.push_back(p->device_id());
}
}
std::vector<uint64_t>& device_ids() { return dev_ids_; }
private:
std::vector<uint64_t> dev_ids_;
uint32_t& inode_no_;
};
class pipe_set_inode_visitor : public visitor_base {
public:
pipe_set_inode_visitor(uint32_t& inode_no)
: inode_no_(inode_no) {}
void visit(device* p) override {
if (p->type() != entry::E_DEVICE) {
p->set_inode(inode_no_++);
}
}
private:
uint32_t& inode_no_;
};
class names_and_links_visitor : public entry_visitor {
public:
names_and_links_visitor(global_entry_data& data)
@ -157,6 +192,8 @@ class names_and_links_visitor : public entry_visitor {
void visit(file* p) override { data_.add_name(p->name()); }
void visit(device* p) override { data_.add_name(p->name()); }
void visit(link* p) override {
data_.add_name(p->name());
data_.add_link(p->linkname());
@ -339,6 +376,12 @@ scanner_<LoggerPolicy>::scan_tree(const std::string& path, progress& prog) {
prog.links_scanned++;
break;
case entry::E_DEVICE:
case entry::E_OTHER:
prog.specials_found++;
pe->scan(*os_, prog);
break;
default:
log_.error() << "unsupported entry type: " << int(pe->type());
prog.errors++;
@ -444,13 +487,24 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.info() << "assigning file inodes...";
im->number_inodes(first_file_inode);
log_.info() << "building metadata...";
global_entry_data ge_data(options_);
thrift::metadata::metadata mv2;
mv2.link_index.resize(first_file_inode - first_link_inode);
log_.info() << "assigning device inodes...";
uint32_t first_device_inode = first_file_inode + im->count();
device_set_inode_visitor devsiv(first_device_inode);
root->accept(devsiv);
mv2.devices_ref() = std::move(devsiv.device_ids());
log_.info() << "assigning pipe/socket inodes...";
uint32_t first_pipe_inode = first_device_inode;
pipe_set_inode_visitor pipsiv(first_pipe_inode);
root->accept(pipsiv);
log_.info() << "building metadata...";
wg_.add_job([&] {
log_.info() << "saving names and links...";
names_and_links_visitor nlv(ge_data);
@ -520,7 +574,7 @@ void scanner_<LoggerPolicy>::scan(filesystem_writer& fsw,
log_.debug() << "total number of chunks: " << mv2.chunks.size();
log_.info() << "saving directories...";
mv2.entry_index.resize(first_file_inode + im->count());
mv2.entry_index.resize(first_pipe_inode);
mv2.directories.reserve(first_link_inode + 1);
save_directories_visitor sdv(first_link_inode);
root->accept(sdv);

View File

@ -19,8 +19,8 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <map>
#include <sstream>
#include <unordered_map>
#include <gtest/gtest.h>
@ -67,17 +67,21 @@ struct simplestat {
::uid_t st_uid;
::gid_t st_gid;
::off_t st_size;
::dev_t st_rdev;
};
std::unordered_map<std::string, simplestat> statmap{
{"/", {S_IFDIR | 0777, 1000, 1000, 0}},
{"//test.pl", {S_IFREG | 0644, 1000, 1000, 0}},
{"//somelink", {S_IFLNK | 0777, 1000, 1000, 16}},
{"//somedir", {S_IFDIR | 0777, 1000, 1000, 0}},
{"//foo.pl", {S_IFREG | 0600, 1337, 0, 23456}},
{"//ipsum.txt", {S_IFREG | 0644, 1000, 1000, 2000000}},
{"//somedir/ipsum.py", {S_IFREG | 0644, 1000, 1000, 10000}},
{"//somedir/bad", {S_IFLNK | 0777, 1000, 1000, 6}},
std::map<std::string, simplestat> statmap{
{"", {S_IFDIR | 0777, 1000, 100, 0, 0}},
{"/test.pl", {S_IFREG | 0644, 1000, 100, 0, 0}},
{"/somelink", {S_IFLNK | 0777, 1000, 100, 16, 0}},
{"/somedir", {S_IFDIR | 0777, 1000, 100, 0, 0}},
{"/foo.pl", {S_IFREG | 0600, 1337, 0, 23456, 0}},
{"/ipsum.txt", {S_IFREG | 0644, 1000, 100, 2000000, 0}},
{"/somedir/ipsum.py", {S_IFREG | 0644, 1000, 100, 10000, 0}},
{"/somedir/bad", {S_IFLNK | 0777, 1000, 100, 6, 0}},
{"/somedir/pipe", {S_IFIFO | 0644, 1000, 100, 0, 0}},
{"/somedir/null", {S_IFCHR | 0666, 0, 0, 0, 259}},
{"/somedir/zero", {S_IFCHR | 0666, 0, 0, 0, 261}},
};
} // namespace
@ -95,18 +99,15 @@ class mmap_mock : public mmif {
class os_access_mock : public os_access {
public:
std::shared_ptr<dir_reader> opendir(const std::string& path) const override {
if (path == "/") {
if (path.empty()) {
std::vector<std::string> files{
".", "..", "test.pl", "somelink", "somedir", "foo.pl", "ipsum.txt",
};
return std::make_shared<dir_reader_mock>(std::move(files));
} else if (path == "//somedir") {
} else if (path == "/somedir") {
std::vector<std::string> files{
".",
"..",
"ipsum.py",
"bad",
".", "..", "ipsum.py", "bad", "pipe", "null", "zero",
};
return std::make_shared<dir_reader_mock>(std::move(files));
@ -125,12 +126,13 @@ class os_access_mock : public os_access {
st->st_atime = 123;
st->st_mtime = 234;
st->st_ctime = 345;
st->st_rdev = sst.st_rdev;
}
std::string readlink(const std::string& path, size_t size) const override {
if (path == "//somelink" && size == 16) {
if (path == "/somelink" && size == 16) {
return "somedir/ipsum.py";
} else if (path == "//somedir/bad" && size == 6) {
} else if (path == "/somedir/bad" && size == 6) {
return "../foo";
}
@ -196,7 +198,7 @@ void basic_end_to_end_test(const std::string& compressor,
block_compressor bc(compressor);
filesystem_writer fsw(oss, lgr, wg, prog, bc, 64 << 20);
s.scan(fsw, "/", prog);
s.scan(fsw, "", prog);
auto mm = std::make_shared<test::mmap_mock>(oss.str());
@ -209,9 +211,10 @@ void basic_end_to_end_test(const std::string& compressor,
struct ::stat st;
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 23456);
EXPECT_EQ(st.st_uid, 1337);
EXPECT_EQ(st.st_gid, 0);
int inode = fs.open(*entry);
EXPECT_GE(inode, 0);
@ -223,20 +226,57 @@ void basic_end_to_end_test(const std::string& compressor,
entry = fs.find("/somelink");
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 16);
EXPECT_EQ(st.st_uid, 1000);
EXPECT_EQ(st.st_gid, 100);
EXPECT_EQ(st.st_rdev, 0);
std::string link;
EXPECT_EQ(fs.readlink(*entry, &link), 0);
EXPECT_EQ(link, "somedir/ipsum.py");
EXPECT_FALSE(fs.find("/somedir/nope"));
entry = fs.find("/somedir/bad");
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 6);
EXPECT_EQ(fs.readlink(*entry, &link), 0);
EXPECT_EQ(link, "../foo");
entry = fs.find("/somedir/pipe");
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 0);
EXPECT_EQ(st.st_uid, 1000);
EXPECT_EQ(st.st_gid, 100);
EXPECT_TRUE(S_ISFIFO(st.st_mode));
EXPECT_EQ(st.st_rdev, 0);
entry = fs.find("/somedir/null");
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 0);
EXPECT_EQ(st.st_uid, 0);
EXPECT_EQ(st.st_gid, 0);
EXPECT_TRUE(S_ISCHR(st.st_mode));
EXPECT_EQ(st.st_rdev, 259);
entry = fs.find("/somedir/zero");
ASSERT_TRUE(entry);
EXPECT_EQ(fs.getattr(*entry, &st), 0);
EXPECT_EQ(st.st_size, 0);
EXPECT_EQ(st.st_uid, 0);
EXPECT_EQ(st.st_gid, 0);
EXPECT_TRUE(S_ISCHR(st.st_mode));
EXPECT_EQ(st.st_rdev, 261);
}
std::vector<std::string> const compressions{"null",

View File

@ -104,7 +104,11 @@ struct metadata {
*/
2: required list<directory> directories,
// all entries, can be looked up by inode through entry_index
/**
* All entries, can be looked up by inode through entry_index, or by
* directory through `first_entry`, where the entries will be between
* `directories[n].first_entry` and `directories[n+1].first_entry`.
*/
3: required list<entry> entries,
/**
@ -114,7 +118,17 @@ struct metadata {
*/
4: required list<UInt32> chunk_index,
// entry index, indexed by inode
/**
* Entry index, indexed by inode
*
* This list contains all inodes strictly in the following order:
*
* - directories, starting with the root dir at inode 0
* - symbolic links
* - regular files
* - character and block devices
* - named pipes and sockets
*/
5: required list<UInt32> entry_index,
// link index, indexed by (inode - link_index_offset)
@ -154,4 +168,7 @@ struct metadata {
// total file system size
16: required UInt64 total_fs_size,
// device ids, for lookup by (inode - device_index_offset)
17: optional list<UInt64> devices,
}