fix(mkdwarfs): don't store inodes with inconsistent fragments

This commit is contained in:
Marcus Holland-Moritz 2024-01-16 18:06:21 +01:00
parent 6804aa0f75
commit b11725b495
7 changed files with 54 additions and 14 deletions

10
TODO
View File

@ -1,16 +1,6 @@
- Fragment counts don't necessarily match up in presence
of errors.
- It is possible, though unlikely, that memory mapping
fails only of a subset of fragments, in which case the
representation of the file in the target image would
be wrong. I don't know yet what the best solution to
this would be - the data for the other fragments is
potentially already stored. Might be best to just mark
the fragments as invalid and later patch the metadata
to drop all references to these fragments (i.e. just
build a zero-size file after the fact).
- When opening a file again, check that its timestamp,
size and potentially checksum did not change from
when we first saw it.

View File

@ -65,7 +65,7 @@ class inode : public object {
virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& all() const = 0;
virtual void
virtual bool
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
virtual inode_fragments& fragments() = 0;
virtual void dump(std::ostream& os, inode_options const& options) const = 0;

View File

@ -52,6 +52,8 @@ class single_inode_fragment {
void extend(file_off_t length) { length_ += length; }
bool chunks_are_consistent() const;
private:
fragment_category category_;
file_off_t length_;

View File

@ -19,6 +19,7 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <numeric>
#include <ostream>
#include <sstream>
@ -45,6 +46,18 @@ void single_inode_fragment::add_chunk(size_t block, size_t offset,
chunks_.push_back(std::move(c));
}
bool single_inode_fragment::chunks_are_consistent() const {
if (length_ > 0 && chunks_.empty()) {
return false;
}
auto total_chunks_len = std::accumulate(
chunks_.begin(), chunks_.end(), file_off_t{0},
[](auto acc, auto const& c) { return acc + c.get_size(); });
return total_chunks_len == length_;
}
std::ostream&
inode_fragments::to_stream(std::ostream& os,
mapper_function_type const& mapper) const {

View File

@ -215,13 +215,19 @@ class inode_ : public inode {
files_vector const& all() const override { return files_; }
void append_chunks_to(std::vector<chunk_type>& vec) const override {
bool append_chunks_to(std::vector<chunk_type>& vec) const override {
for (auto const& frag : fragments_) {
if (!frag.chunks_are_consistent()) {
return false;
}
}
for (auto const& frag : fragments_) {
auto chks = frag.chunks();
if (!chks.empty()) {
vec.insert(vec.end(), chks.begin(), chks.end());
}
}
return true;
}
inode_fragments& fragments() override { return fragments_; }

View File

@ -820,7 +820,14 @@ void scanner_<LoggerPolicy>::scan(
mv2.chunks().value().reserve(prog.chunk_count);
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size();
ino->append_chunks_to(mv2.chunks().value());
if (!ino->append_chunks_to(mv2.chunks().value())) {
std::ostringstream oss;
for (auto fp : ino->all()) {
oss << "\n " << fp->path_as_string();
}
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
<< ", the following files will be empty:" << oss.str();
}
});
blockmgr->map_logical_blocks(mv2.chunks().value());

View File

@ -33,6 +33,7 @@
#include <fmt/format.h>
#include <folly/FileUtil.h>
#include <folly/String.h>
#include <folly/container/Enumerate.h>
#include <folly/json.h>
@ -2262,11 +2263,32 @@ TEST_P(map_file_error_test, delayed) {
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
auto files = t.add_random_file_tree({.avg_size = 64.0,
.dimension = 20,
.max_name_len = 8,
.with_errors = true});
{
std::mt19937_64 rng{42};
for (auto const& p : fs::recursive_directory_iterator(audio_data_dir)) {
if (p.is_regular_file()) {
auto fp = fs::relative(p.path(), audio_data_dir);
std::string contents;
ASSERT_TRUE(folly::readFile(p.path().string().c_str(), contents));
files.emplace_back(fp, std::move(contents));
if (rng() % 2 == 0) {
t.os->set_map_file_error(
fs::path{"/"} / fp,
std::make_exception_ptr(std::runtime_error("map_file_error")),
rng() % 4);
}
}
}
}
t.os->setenv("DWARFS_DUMP_INODES", "inodes.dump");
// t.iol->use_real_terminal(true);
@ -2317,7 +2339,7 @@ TEST_P(map_file_error_test, delayed) {
EXPECT_LE(failed_actual.size(), failed_expected.size());
EXPECT_EQ(8000, files.size());
EXPECT_GT(files.size(), 8000);
EXPECT_GT(num_non_empty, 4000);
// Ensure that files which never had any errors are all present