mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-13 14:27:30 -04:00
feat: add --change-block-size
This commit is contained in:
parent
e5e0a36ea5
commit
e7c35d2402
@ -242,13 +242,19 @@ Most other options are concerned with compression tuning:
|
|||||||
- `--rebuild-metadata`:
|
- `--rebuild-metadata`:
|
||||||
Completely rebuild the metadata block. This will upgrade the internal format
|
Completely rebuild the metadata block. This will upgrade the internal format
|
||||||
of the metadata to the latest version instead of just recompressing the
|
of the metadata to the latest version instead of just recompressing the
|
||||||
metadata block.
|
metadata block. Implies `--recompress=metadata`.
|
||||||
|
|
||||||
|
- `--change-block-size`:
|
||||||
|
Change the block size while recompressing. This will change the block size
|
||||||
|
according to the size given in `--block-size-bits`. Even if the block size
|
||||||
|
is unchanged, this will still re-order and re-compress *all* blocks. Implies
|
||||||
|
`--recompress=all` and `--rebuild-metadata`.
|
||||||
|
|
||||||
- `--recompress-categories=`[`!`]*category*[`,`...]:
|
- `--recompress-categories=`[`!`]*category*[`,`...]:
|
||||||
When `--recompress` is set to `all` or `block`, this option controls
|
When `--recompress` is set to `all` or `block`, this option controls
|
||||||
which categories of blocks will be recompressed. Adding a `!` in front
|
which categories of blocks will be recompressed. Adding a `!` in front
|
||||||
of the list allows you to specify which categories will *not* be
|
of the list allows you to specify which categories will *not* be
|
||||||
recompressed.
|
recompressed. Cannot be used with `--change-block-size`.
|
||||||
|
|
||||||
- `-P`, `--pack-metadata=auto`|`none`|[`all`|`chunk_table`|`directories`|`shared_files`|`names`|`names_index`|`symlinks`|`symlinks_index`|`force`|`plain`[`,`...]]:
|
- `-P`, `--pack-metadata=auto`|`none`|[`all`|`chunk_table`|`directories`|`shared_files`|`names`|`names_index`|`symlinks`|`symlinks_index`|`force`|`plain`[`,`...]]:
|
||||||
Which metadata information to store in packed format. This is primarily
|
Which metadata information to store in packed format. This is primarily
|
||||||
|
@ -487,6 +487,9 @@ class filesystem_v2 final : public filesystem_v2_lite {
|
|||||||
|
|
||||||
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const;
|
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const;
|
||||||
|
|
||||||
|
std::future<block_range>
|
||||||
|
read_raw_block_data(size_t block_no, size_t offset, size_t size) const;
|
||||||
|
|
||||||
class impl : public impl_lite {
|
class impl : public impl_lite {
|
||||||
public:
|
public:
|
||||||
virtual int
|
virtual int
|
||||||
@ -504,6 +507,8 @@ class filesystem_v2 final : public filesystem_v2_lite {
|
|||||||
unpacked_metadata() const = 0;
|
unpacked_metadata() const = 0;
|
||||||
virtual std::unique_ptr<thrift::metadata::fs_options>
|
virtual std::unique_ptr<thrift::metadata::fs_options>
|
||||||
thawed_fs_options() const = 0;
|
thawed_fs_options() const = 0;
|
||||||
|
virtual std::future<block_range>
|
||||||
|
read_raw_block_data(size_t block, size_t offset, size_t size) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -105,6 +105,11 @@ class inode_reader_v2 {
|
|||||||
impl_->cache_blocks(blocks);
|
impl_->cache_blocks(blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::future<block_range>
|
||||||
|
read_raw_block_data(size_t block_no, size_t offset, size_t size) const {
|
||||||
|
return impl_->read_raw_block_data(block_no, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
public:
|
public:
|
||||||
virtual ~impl() = default;
|
virtual ~impl() = default;
|
||||||
@ -127,6 +132,8 @@ class inode_reader_v2 {
|
|||||||
virtual void set_cache_tidy_config(cache_tidy_config const& cfg) = 0;
|
virtual void set_cache_tidy_config(cache_tidy_config const& cfg) = 0;
|
||||||
virtual size_t num_blocks() const = 0;
|
virtual size_t num_blocks() const = 0;
|
||||||
virtual void cache_blocks(std::span<size_t const> blocks) const = 0;
|
virtual void cache_blocks(std::span<size_t const> blocks) const = 0;
|
||||||
|
virtual std::future<block_range>
|
||||||
|
read_raw_block_data(size_t block_no, size_t offset, size_t size) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -36,6 +36,7 @@ namespace dwarfs::utility {
|
|||||||
struct rewrite_options {
|
struct rewrite_options {
|
||||||
bool recompress_block{false};
|
bool recompress_block{false};
|
||||||
bool recompress_metadata{false};
|
bool recompress_metadata{false};
|
||||||
|
std::optional<size_t> change_block_size;
|
||||||
std::optional<writer::metadata_options> rebuild_metadata;
|
std::optional<writer::metadata_options> rebuild_metadata;
|
||||||
std::unordered_set<std::string> recompress_categories;
|
std::unordered_set<std::string> recompress_categories;
|
||||||
bool recompress_categories_exclude{false};
|
bool recompress_categories_exclude{false};
|
||||||
|
@ -32,6 +32,8 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <folly/Function.h>
|
||||||
|
|
||||||
#include <dwarfs/block_compressor.h>
|
#include <dwarfs/block_compressor.h>
|
||||||
#include <dwarfs/byte_buffer.h>
|
#include <dwarfs/byte_buffer.h>
|
||||||
#include <dwarfs/compression_constraints.h>
|
#include <dwarfs/compression_constraints.h>
|
||||||
@ -48,6 +50,15 @@ class fs_section;
|
|||||||
|
|
||||||
namespace writer::internal {
|
namespace writer::internal {
|
||||||
|
|
||||||
|
struct block_compression_info {
|
||||||
|
size_t uncompressed_size{};
|
||||||
|
std::optional<std::string> metadata;
|
||||||
|
std::optional<compression_constraints> constraints;
|
||||||
|
};
|
||||||
|
|
||||||
|
using delayed_data_fn_type = folly::Function<
|
||||||
|
std::pair<shared_byte_buffer, std::optional<std::string>>()>;
|
||||||
|
|
||||||
class filesystem_writer_detail {
|
class filesystem_writer_detail {
|
||||||
public:
|
public:
|
||||||
virtual ~filesystem_writer_detail() = default;
|
virtual ~filesystem_writer_detail() = default;
|
||||||
@ -81,11 +92,15 @@ class filesystem_writer_detail {
|
|||||||
virtual void write_history(shared_byte_buffer data) = 0;
|
virtual void write_history(shared_byte_buffer data) = 0;
|
||||||
virtual void check_block_compression(
|
virtual void check_block_compression(
|
||||||
compression_type compression, std::span<uint8_t const> data,
|
compression_type compression, std::span<uint8_t const> data,
|
||||||
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
|
std::optional<fragment_category::value_type> cat = std::nullopt,
|
||||||
|
block_compression_info* info = nullptr) = 0;
|
||||||
virtual void write_section(
|
virtual void write_section(
|
||||||
section_type type, compression_type compression,
|
section_type type, compression_type compression,
|
||||||
std::span<uint8_t const> data,
|
std::span<uint8_t const> data,
|
||||||
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
|
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
|
||||||
|
virtual void rewrite_block(
|
||||||
|
delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
|
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
|
||||||
virtual void write_compressed_section(dwarfs::internal::fs_section const& sec,
|
virtual void write_compressed_section(dwarfs::internal::fs_section const& sec,
|
||||||
std::span<uint8_t const> data) = 0;
|
std::span<uint8_t const> data) = 0;
|
||||||
virtual void flush() = 0;
|
virtual void flush() = 0;
|
||||||
|
@ -49,6 +49,19 @@ class inode_manager;
|
|||||||
class block_manager;
|
class block_manager;
|
||||||
class dir;
|
class dir;
|
||||||
|
|
||||||
|
struct block_chunk {
|
||||||
|
size_t block{};
|
||||||
|
size_t offset{};
|
||||||
|
size_t size{};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct block_mapping {
|
||||||
|
size_t old_block{};
|
||||||
|
std::vector<block_chunk> chunks{};
|
||||||
|
|
||||||
|
std::vector<block_chunk> map_chunk(size_t offset, size_t size) const;
|
||||||
|
};
|
||||||
|
|
||||||
class metadata_builder {
|
class metadata_builder {
|
||||||
public:
|
public:
|
||||||
// Start with empty metadata
|
// Start with empty metadata
|
||||||
@ -125,6 +138,10 @@ class metadata_builder {
|
|||||||
impl_->gather_global_entry_data(ge_data);
|
impl_->gather_global_entry_data(ge_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void remap_blocks(std::span<block_mapping const> mapping) {
|
||||||
|
impl_->remap_blocks(mapping);
|
||||||
|
}
|
||||||
|
|
||||||
thrift::metadata::metadata const& build() { return impl_->build(); }
|
thrift::metadata::metadata const& build() { return impl_->build(); }
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
@ -152,6 +169,7 @@ class metadata_builder {
|
|||||||
gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
|
gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
|
||||||
uint32_t num_inodes) = 0;
|
uint32_t num_inodes) = 0;
|
||||||
virtual void gather_global_entry_data(global_entry_data const& ge_data) = 0;
|
virtual void gather_global_entry_data(global_entry_data const& ge_data) = 0;
|
||||||
|
virtual void remap_blocks(std::span<block_mapping const> mapping) = 0;
|
||||||
|
|
||||||
virtual thrift::metadata::metadata const& build() = 0;
|
virtual thrift::metadata::metadata const& build() = 0;
|
||||||
};
|
};
|
||||||
|
@ -345,6 +345,11 @@ class filesystem_ final {
|
|||||||
return metadata_v2_utils(meta_).thaw_fs_options();
|
return metadata_v2_utils(meta_).thaw_fs_options();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::future<block_range>
|
||||||
|
read_raw_block_data(size_t block_no, size_t offset, size_t size) const {
|
||||||
|
return ir_.read_raw_block_data(block_no, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
filesystem_parser make_fs_parser() const {
|
filesystem_parser make_fs_parser() const {
|
||||||
return filesystem_parser(mm_, image_offset_, options_.image_size);
|
return filesystem_parser(mm_, image_offset_, options_.image_size);
|
||||||
@ -1387,6 +1392,10 @@ class filesystem_full_
|
|||||||
thawed_fs_options() const override {
|
thawed_fs_options() const override {
|
||||||
return fs().thawed_fs_options();
|
return fs().thawed_fs_options();
|
||||||
}
|
}
|
||||||
|
std::future<block_range> read_raw_block_data(size_t block_no, size_t offset,
|
||||||
|
size_t size) const override {
|
||||||
|
return fs().read_raw_block_data(block_no, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
history history_;
|
history history_;
|
||||||
@ -1519,6 +1528,12 @@ filesystem_v2::thawed_fs_options() const {
|
|||||||
return full_().thawed_fs_options();
|
return full_().thawed_fs_options();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::future<block_range>
|
||||||
|
filesystem_v2::read_raw_block_data(size_t block_no, size_t offset,
|
||||||
|
size_t size) const {
|
||||||
|
return full_().read_raw_block_data(block_no, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
auto filesystem_v2::full_() const -> impl const& { return this->as_<impl>(); }
|
auto filesystem_v2::full_() const -> impl const& { return this->as_<impl>(); }
|
||||||
|
|
||||||
} // namespace dwarfs::reader
|
} // namespace dwarfs::reader
|
||||||
|
@ -158,6 +158,9 @@ class inode_reader_ final : public inode_reader_v2::impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::future<block_range> read_raw_block_data(size_t block_no, size_t offset,
|
||||||
|
size_t size) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using offset_cache_type =
|
using offset_cache_type =
|
||||||
basic_offset_cache<uint32_t, file_off_t, size_t,
|
basic_offset_cache<uint32_t, file_off_t, size_t,
|
||||||
@ -249,6 +252,13 @@ void inode_reader_<LoggerPolicy>::do_readahead(uint32_t inode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
std::future<block_range>
|
||||||
|
inode_reader_<LoggerPolicy>::read_raw_block_data(size_t block_no, size_t offset,
|
||||||
|
size_t size) const {
|
||||||
|
return cache_.get(block_no, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
std::vector<std::future<block_range>>
|
std::vector<std::future<block_range>>
|
||||||
inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
|
inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
|
||||||
|
@ -21,8 +21,11 @@
|
|||||||
* SPDX-License-Identifier: GPL-3.0-only
|
* SPDX-License-Identifier: GPL-3.0-only
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <dwarfs/history.h>
|
#include <dwarfs/history.h>
|
||||||
#include <dwarfs/logger.h>
|
#include <dwarfs/logger.h>
|
||||||
|
#include <dwarfs/malloc_byte_buffer.h>
|
||||||
#include <dwarfs/reader/filesystem_v2.h>
|
#include <dwarfs/reader/filesystem_v2.h>
|
||||||
#include <dwarfs/util.h>
|
#include <dwarfs/util.h>
|
||||||
#include <dwarfs/utility/rewrite_options.h>
|
#include <dwarfs/utility/rewrite_options.h>
|
||||||
@ -38,6 +41,217 @@
|
|||||||
|
|
||||||
namespace dwarfs::utility {
|
namespace dwarfs::utility {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
In order to be able to change the block size, we need to first build a list
|
||||||
|
of all blocks, along with their categories *and* category-specific metadata.
|
||||||
|
Only blocks in the same category and with the same metadata are eligible for
|
||||||
|
merging. While category/metadata is mostly irrelevant for splitting, splitting
|
||||||
|
requires us to know the compression constraints (i.e. the granularity of the
|
||||||
|
data) so we can split at the correct boundaries.
|
||||||
|
|
||||||
|
Granularity also makes splitting/merging more complicated, as we potentially
|
||||||
|
cannot simply split a block because one of the new blocks would be larger than
|
||||||
|
the block size. In which case we must move the excess data to the next block,
|
||||||
|
and so on. Simpilarly, when merging blocks, we can potentially fill up the
|
||||||
|
block with data from the next block.
|
||||||
|
|
||||||
|
So, ultimately, we need to define for each block in the rewritten filesystem
|
||||||
|
image the chunks of which it is made up. This mapping will not only be used
|
||||||
|
to build the new blocks, but also to rebuild the metadata. In the metadata,
|
||||||
|
both the chunks *and* the chunk table must be updated, since individual chunks
|
||||||
|
can be either merged or split as well. If we want to be super accurate, we
|
||||||
|
would also need to update the inode size cache; but this would only be relevant
|
||||||
|
if we go from a really large block size to a really small one. Then again, it
|
||||||
|
shouldn't be too hard to update the cache. What we *definitely* need to update
|
||||||
|
is the `block_categories` as well as the `block_category_metadata` tables in
|
||||||
|
the metadata.
|
||||||
|
|
||||||
|
So, what we need:
|
||||||
|
|
||||||
|
- A list of all blocks, along with their categories and metadata
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct block_info {
|
||||||
|
size_t block{};
|
||||||
|
size_t uncompressed_size{};
|
||||||
|
std::optional<dwarfs::internal::fs_section> section;
|
||||||
|
std::optional<std::string> category_name;
|
||||||
|
std::optional<std::string> metadata;
|
||||||
|
std::optional<compression_constraints> constraints;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
- An algorithm for splitting/merging that outputs the new block positions
|
||||||
|
(numbers) and the chunks that make up each block
|
||||||
|
|
||||||
|
struct block_chunk { // see metadata_builder.h
|
||||||
|
size_t block;
|
||||||
|
size_t offset;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct new_block_mapping {
|
||||||
|
size_t block{};
|
||||||
|
size_t size{};
|
||||||
|
std::vector<dwarfs::writer::internal::block_chunk> chunks{};
|
||||||
|
std::optional<std::string> category_name;
|
||||||
|
std::optional<std::string> metadata;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
- The algorithm should be deterministic. It doesn't have to be reversible,
|
||||||
|
i.e. splitting then merging or merging then splitting doesn't have to
|
||||||
|
yield the same result (or even the original filesystem image). But when
|
||||||
|
splitting or merging, the result should *always* be the same given the
|
||||||
|
same input. That means we *could* actually consider grouping blocks by
|
||||||
|
category and metadata in the output.
|
||||||
|
|
||||||
|
TODO: Check if we've gone from a compression with constraints to one
|
||||||
|
without (i.e. granularity 3 -> 1) and want to go back to the
|
||||||
|
original compression *without* a block size change, that should
|
||||||
|
fail early.
|
||||||
|
|
||||||
|
We need two new features to support this:
|
||||||
|
|
||||||
|
- `filesystem_v2` must allow reading raw block data (i.e. not file-based).
|
||||||
|
That way, we can easily make use of the block cache while re-composing
|
||||||
|
the blocks.
|
||||||
|
|
||||||
|
- `filesystem_writer` must allow delayed reading of the block data. We
|
||||||
|
can hopefully refactor the `rewritten_fsblock` to support this.
|
||||||
|
|
||||||
|
How does the remapping process work in the metadata builder?
|
||||||
|
|
||||||
|
The chunk_table is just a list of the first chunk of each regular file
|
||||||
|
inode, plus a sentinel at the end. Basically, we need to traverse the
|
||||||
|
chunk_table and the chunks it references and build new versions of the
|
||||||
|
chunk_table and chunks using the new blocks.
|
||||||
|
|
||||||
|
To build a new chunk from an old chunk, we must be able figure out which
|
||||||
|
new blocks an old block is mapped to. This is sort of the opposite of
|
||||||
|
`mapped_block_info`, where we have stored which chunks of old blocks
|
||||||
|
make up a new block. So we need a second mapping:
|
||||||
|
|
||||||
|
struct block_mapping { // see metadata_builder.h
|
||||||
|
size_t old_block;
|
||||||
|
std::vector<block_chunk> chunks;
|
||||||
|
};
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct rw_block_mappings {
|
||||||
|
std::vector<new_block_mapping> new_to_old;
|
||||||
|
std::vector<dwarfs::writer::internal::block_mapping> old_to_new;
|
||||||
|
};
|
||||||
|
|
||||||
|
rw_block_mappings build_block_mappings(std::span<block_info const> blocks,
|
||||||
|
size_t const block_size) {
|
||||||
|
using stream_id =
|
||||||
|
std::pair<std::optional<std::string>, std::optional<std::string>>;
|
||||||
|
std::vector<std::vector<size_t>> streams;
|
||||||
|
std::map<stream_id, size_t> stream_map;
|
||||||
|
|
||||||
|
for (auto const& b : blocks) {
|
||||||
|
stream_id id{b.category_name, b.metadata};
|
||||||
|
auto [it, inserted] = stream_map.try_emplace(id, streams.size());
|
||||||
|
if (inserted) {
|
||||||
|
streams.emplace_back();
|
||||||
|
}
|
||||||
|
streams[it->second].push_back(b.block);
|
||||||
|
}
|
||||||
|
|
||||||
|
rw_block_mappings result;
|
||||||
|
|
||||||
|
for (auto const& stream : streams) {
|
||||||
|
size_t granularity{1};
|
||||||
|
|
||||||
|
if (auto const& cc = blocks[stream[0]].constraints; cc && cc->granularity) {
|
||||||
|
granularity = cc->granularity.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t const max_stream_block_size{granularity *
|
||||||
|
(block_size / granularity)};
|
||||||
|
|
||||||
|
std::vector<new_block_mapping> mapped;
|
||||||
|
|
||||||
|
for (size_t block : stream) {
|
||||||
|
result.old_to_new.push_back({.old_block = block});
|
||||||
|
auto& old_to_new = result.old_to_new.back();
|
||||||
|
auto const& b = blocks[block];
|
||||||
|
size_t offset{0};
|
||||||
|
|
||||||
|
while (offset < b.uncompressed_size) {
|
||||||
|
if (mapped.empty() || mapped.back().size == max_stream_block_size) {
|
||||||
|
mapped.push_back({.block = result.new_to_old.size() + mapped.size(),
|
||||||
|
.category_name = b.category_name,
|
||||||
|
.metadata = b.metadata});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& m = mapped.back();
|
||||||
|
size_t const chunk_size{std::min(b.uncompressed_size - offset,
|
||||||
|
max_stream_block_size - m.size)};
|
||||||
|
|
||||||
|
DWARFS_CHECK(chunk_size % granularity == 0,
|
||||||
|
fmt::format("chunk_size ({}) % granularity ({}) != 0",
|
||||||
|
chunk_size, granularity));
|
||||||
|
|
||||||
|
old_to_new.chunks.push_back(
|
||||||
|
{.block = m.block, .offset = m.size, .size = chunk_size});
|
||||||
|
|
||||||
|
m.chunks.push_back(
|
||||||
|
{.block = block, .offset = offset, .size = chunk_size});
|
||||||
|
|
||||||
|
m.size += chunk_size;
|
||||||
|
offset += chunk_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ranges::move(mapped, std::back_inserter(result.new_to_old));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ranges::sort(result.old_to_new, [](auto const& a, auto const& b) {
|
||||||
|
return a.old_block < b.old_block;
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string block_mappings_to_string(rw_block_mappings const& mapped) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
for (auto const& m : mapped.new_to_old) {
|
||||||
|
oss << "new block " << m.block << " (size " << m.size;
|
||||||
|
if (m.category_name) {
|
||||||
|
oss << ", category " << *m.category_name;
|
||||||
|
}
|
||||||
|
if (m.metadata) {
|
||||||
|
oss << ", metadata " << *m.metadata;
|
||||||
|
}
|
||||||
|
oss << "):\n";
|
||||||
|
for (auto const& c : m.chunks) {
|
||||||
|
oss << " chunk: old block " << c.block << ", offset " << c.offset
|
||||||
|
<< ", size " << c.size << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto const& m : mapped.old_to_new) {
|
||||||
|
oss << "old block " << m.old_block << ":\n";
|
||||||
|
for (auto const& c : m.chunks) {
|
||||||
|
oss << " chunk: new block " << c.block << ", offset " << c.offset
|
||||||
|
<< ", size " << c.size << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
||||||
dwarfs::writer::filesystem_writer& fs_writer,
|
dwarfs::writer::filesystem_writer& fs_writer,
|
||||||
dwarfs::writer::category_resolver const& cat_resolver,
|
dwarfs::writer::category_resolver const& cat_resolver,
|
||||||
@ -46,6 +260,18 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
|||||||
|
|
||||||
LOG_PROXY(debug_logger_policy, lgr);
|
LOG_PROXY(debug_logger_policy, lgr);
|
||||||
|
|
||||||
|
if (opts.change_block_size) {
|
||||||
|
DWARFS_CHECK(opts.recompress_block,
|
||||||
|
"change_block_size requires recompress_block");
|
||||||
|
DWARFS_CHECK(opts.recompress_metadata,
|
||||||
|
"change_block_size requires recompress_metadata");
|
||||||
|
DWARFS_CHECK(opts.rebuild_metadata,
|
||||||
|
"change_block_size requires rebuild_metadata");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<block_info> blocks;
|
||||||
|
rw_block_mappings mapped_blocks;
|
||||||
|
|
||||||
auto parser = fs.get_parser();
|
auto parser = fs.get_parser();
|
||||||
|
|
||||||
auto& writer = fs_writer.get_internal();
|
auto& writer = fs_writer.get_internal();
|
||||||
@ -54,20 +280,64 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
|||||||
size_t block_no{0};
|
size_t block_no{0};
|
||||||
parser->rewind();
|
parser->rewind();
|
||||||
|
|
||||||
while (auto s = parser->next_section()) {
|
{
|
||||||
if (s->type() == section_type::BLOCK) {
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
if (auto catstr = fs.get_block_category(block_no)) {
|
|
||||||
if (auto cat = cat_resolver.category_value(catstr.value())) {
|
while (auto s = parser->next_section()) {
|
||||||
writer.check_block_compression(s->compression(),
|
if (s->type() == section_type::BLOCK) {
|
||||||
parser->section_data(*s), cat);
|
dwarfs::writer::internal::block_compression_info bci;
|
||||||
|
auto catstr = fs.get_block_category(block_no);
|
||||||
|
std::optional<fragment_category::value_type> cat;
|
||||||
|
|
||||||
|
if (catstr) {
|
||||||
|
cat = cat_resolver.category_value(catstr.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
writer.check_block_compression(
|
||||||
|
s->compression(), parser->section_data(*s), cat,
|
||||||
|
opts.change_block_size ? &bci : nullptr);
|
||||||
|
|
||||||
|
if (opts.change_block_size) {
|
||||||
|
DWARFS_CHECK(block_no == blocks.size(),
|
||||||
|
fmt::format("block_no ({}) != blocks.size() ({})",
|
||||||
|
block_no, blocks.size()));
|
||||||
|
LOG_DEBUG << "adding block " << block_no
|
||||||
|
<< " uncompressed size: " << bci.uncompressed_size;
|
||||||
|
auto& info = blocks.emplace_back();
|
||||||
|
info.block = block_no;
|
||||||
|
info.uncompressed_size = bci.uncompressed_size;
|
||||||
|
info.section = s;
|
||||||
|
info.category_name = catstr;
|
||||||
|
info.metadata = bci.metadata;
|
||||||
|
info.constraints = bci.constraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
++block_no;
|
||||||
}
|
}
|
||||||
++block_no;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tv << "checked compression for " << block_no << " blocks";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts.change_block_size) {
|
||||||
|
{
|
||||||
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
|
|
||||||
|
mapped_blocks =
|
||||||
|
build_block_mappings(blocks, opts.change_block_size.value());
|
||||||
|
|
||||||
|
tv << "mapped " << blocks.size() << " source blocks to "
|
||||||
|
<< mapped_blocks.new_to_old.size() << " target blocks";
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG << block_mappings_to_string(mapped_blocks);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.configure_rewrite(parser->filesystem_size(), fs.num_blocks());
|
writer.configure_rewrite(parser->filesystem_size(),
|
||||||
|
opts.change_block_size
|
||||||
|
? mapped_blocks.new_to_old.size()
|
||||||
|
: fs.num_blocks());
|
||||||
|
|
||||||
if (auto header = parser->header()) {
|
if (auto header = parser->header()) {
|
||||||
writer.copy_header(*header);
|
writer.copy_header(*header);
|
||||||
@ -121,49 +391,77 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (opts.change_block_size) {
|
||||||
|
for (auto const& m : mapped_blocks.new_to_old) {
|
||||||
|
std::optional<fragment_category::value_type> cat;
|
||||||
|
|
||||||
|
if (m.category_name) {
|
||||||
|
cat = cat_resolver.category_value(m.category_name.value());
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.rewrite_block(
|
||||||
|
[&] {
|
||||||
|
auto data = malloc_byte_buffer::create_reserve(m.size);
|
||||||
|
for (auto const& c : m.chunks) {
|
||||||
|
auto range =
|
||||||
|
fs.read_raw_block_data(c.block, c.offset, c.size).get();
|
||||||
|
data.append(range.data(), range.size());
|
||||||
|
}
|
||||||
|
DWARFS_CHECK(data.size() == m.size,
|
||||||
|
fmt::format("data size {} != expected size {}",
|
||||||
|
data.size(), m.size));
|
||||||
|
return std::pair{data.share(), m.metadata};
|
||||||
|
},
|
||||||
|
m.size, cat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
parser->rewind();
|
parser->rewind();
|
||||||
|
|
||||||
while (auto s = parser->next_section()) {
|
while (auto s = parser->next_section()) {
|
||||||
switch (s->type()) {
|
switch (s->type()) {
|
||||||
case section_type::BLOCK: {
|
case section_type::BLOCK:
|
||||||
std::optional<fragment_category::value_type> cat;
|
if (!opts.change_block_size) {
|
||||||
bool recompress_block{opts.recompress_block};
|
std::optional<fragment_category::value_type> cat;
|
||||||
|
bool recompress_block{opts.recompress_block};
|
||||||
|
|
||||||
if (recompress_block) {
|
if (recompress_block) {
|
||||||
auto catstr = fs.get_block_category(block_no);
|
auto catstr = fs.get_block_category(block_no);
|
||||||
|
|
||||||
if (catstr) {
|
if (catstr) {
|
||||||
cat = cat_resolver.category_value(catstr.value());
|
cat = cat_resolver.category_value(catstr.value());
|
||||||
|
|
||||||
if (!cat) {
|
if (!cat) {
|
||||||
LOG_ERROR << "unknown category '" << catstr.value()
|
LOG_ERROR << "unknown category '" << catstr.value()
|
||||||
<< "' for block " << block_no;
|
<< "' for block " << block_no;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!opts.recompress_categories.empty()) {
|
if (!opts.recompress_categories.empty()) {
|
||||||
bool is_in_set{opts.recompress_categories.contains(catstr.value())};
|
bool is_in_set{
|
||||||
|
opts.recompress_categories.contains(catstr.value())};
|
||||||
|
|
||||||
recompress_block =
|
recompress_block =
|
||||||
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
|
opts.recompress_categories_exclude ? !is_in_set : is_in_set;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (recompress_block && from_none_to_none(s, cat)) {
|
||||||
|
recompress_block = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recompress_block) {
|
||||||
|
log_recompress(s, cat);
|
||||||
|
|
||||||
|
writer.write_section(section_type::BLOCK, s->compression(),
|
||||||
|
parser->section_data(*s), cat);
|
||||||
|
} else {
|
||||||
|
copy_compressed(s, cat);
|
||||||
|
}
|
||||||
|
|
||||||
|
++block_no;
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
if (recompress_block && from_none_to_none(s, cat)) {
|
|
||||||
recompress_block = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (recompress_block) {
|
|
||||||
log_recompress(s, cat);
|
|
||||||
|
|
||||||
writer.write_section(section_type::BLOCK, s->compression(),
|
|
||||||
parser->section_data(*s), cat);
|
|
||||||
} else {
|
|
||||||
copy_compressed(s, cat);
|
|
||||||
}
|
|
||||||
|
|
||||||
++block_no;
|
|
||||||
} break;
|
|
||||||
|
|
||||||
case section_type::METADATA_V2_SCHEMA:
|
case section_type::METADATA_V2_SCHEMA:
|
||||||
case section_type::METADATA_V2:
|
case section_type::METADATA_V2:
|
||||||
@ -178,6 +476,12 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
|||||||
auto builder =
|
auto builder =
|
||||||
metadata_builder(lgr, std::move(*md), fsopts.get(), fs.version(),
|
metadata_builder(lgr, std::move(*md), fsopts.get(), fs.version(),
|
||||||
opts.rebuild_metadata.value());
|
opts.rebuild_metadata.value());
|
||||||
|
|
||||||
|
if (opts.change_block_size) {
|
||||||
|
builder.set_block_size(opts.change_block_size.value());
|
||||||
|
builder.remap_blocks(mapped_blocks.old_to_new);
|
||||||
|
}
|
||||||
|
|
||||||
auto [schema, data] =
|
auto [schema, data] =
|
||||||
metadata_freezer(LOG_GET_LOGGER).freeze(builder.build());
|
metadata_freezer(LOG_GET_LOGGER).freeze(builder.build());
|
||||||
|
|
||||||
|
@ -133,7 +133,7 @@ class fsblock {
|
|||||||
std::shared_ptr<compression_progress> pctx);
|
std::shared_ptr<compression_progress> pctx);
|
||||||
|
|
||||||
fsblock(section_type type, block_compressor const& bc,
|
fsblock(section_type type, block_compressor const& bc,
|
||||||
std::span<uint8_t const> data, compression_type data_comp_type,
|
delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
std::shared_ptr<compression_progress> pctx);
|
std::shared_ptr<compression_progress> pctx);
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -371,26 +371,27 @@ class compressed_fsblock : public fsblock::impl {
|
|||||||
class rewritten_fsblock : public fsblock::impl {
|
class rewritten_fsblock : public fsblock::impl {
|
||||||
public:
|
public:
|
||||||
rewritten_fsblock(section_type type, block_compressor const& bc,
|
rewritten_fsblock(section_type type, block_compressor const& bc,
|
||||||
std::span<uint8_t const> data,
|
delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
compression_type data_comp_type,
|
|
||||||
std::shared_ptr<compression_progress> pctx)
|
std::shared_ptr<compression_progress> pctx)
|
||||||
: type_{type}
|
: type_{type}
|
||||||
, bc_{bc}
|
, bc_{bc}
|
||||||
, data_{data}
|
, data_{std::move(data)}
|
||||||
, comp_type_{bc_.type()}
|
, comp_type_{bc_.type()}
|
||||||
, pctx_{std::move(pctx)}
|
, pctx_{std::move(pctx)}
|
||||||
, data_comp_type_{data_comp_type} {
|
, uncompressed_size_{uncompressed_size} {
|
||||||
DWARFS_CHECK(bc_, "block_compressor must not be null");
|
DWARFS_CHECK(bc_, "block_compressor must not be null");
|
||||||
}
|
}
|
||||||
|
|
||||||
void compress(worker_group& wg, std::optional<std::string> meta) override {
|
void compress(worker_group& wg, std::optional<std::string> meta) override {
|
||||||
|
DWARFS_CHECK(!meta,
|
||||||
|
"metadata not supported for rewritten_fsblock::compress");
|
||||||
|
|
||||||
std::promise<void> prom;
|
std::promise<void> prom;
|
||||||
future_ = prom.get_future();
|
future_ = prom.get_future();
|
||||||
|
|
||||||
wg.add_job(
|
wg.add_job([this, prom = std::move(prom)]() mutable {
|
||||||
[this, prom = std::move(prom), meta = std::move(meta)]() mutable {
|
compress_job(std::move(prom));
|
||||||
compress_job(std::move(prom), std::move(meta));
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void wait_until_compressed() override { future_.get(); }
|
void wait_until_compressed() override { future_.get(); }
|
||||||
@ -406,7 +407,7 @@ class rewritten_fsblock : public fsblock::impl {
|
|||||||
return block_data_.value().span();
|
return block_data_.value().span();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t uncompressed_size() const override { return data_.size(); }
|
size_t uncompressed_size() const override { return uncompressed_size_; }
|
||||||
|
|
||||||
size_t size() const override {
|
size_t size() const override {
|
||||||
std::lock_guard lock(mx_);
|
std::lock_guard lock(mx_);
|
||||||
@ -444,31 +445,20 @@ class rewritten_fsblock : public fsblock::impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void compress_job(std::promise<void> prom, std::optional<std::string> meta) {
|
void compress_job(std::promise<void> prom) {
|
||||||
try {
|
try {
|
||||||
shared_byte_buffer block;
|
auto [block, meta] = data_();
|
||||||
|
|
||||||
{
|
pctx_->bytes_in += block.size(); // TODO: data_.size()?
|
||||||
// TODO: we don't have to do this for uncompressed blocks
|
|
||||||
block_decompressor bd(data_comp_type_, data_);
|
|
||||||
block = bd.start_decompression(malloc_byte_buffer::create());
|
|
||||||
bd.decompress_frame(bd.uncompressed_size());
|
|
||||||
|
|
||||||
if (!meta) {
|
try {
|
||||||
meta = bd.metadata();
|
if (meta) {
|
||||||
}
|
block = bc_.compress(block, *meta);
|
||||||
|
} else {
|
||||||
pctx_->bytes_in += block.size(); // TODO: data_.size()?
|
block = bc_.compress(block);
|
||||||
|
|
||||||
try {
|
|
||||||
if (meta) {
|
|
||||||
block = bc_.compress(block, *meta);
|
|
||||||
} else {
|
|
||||||
block = bc_.compress(block);
|
|
||||||
}
|
|
||||||
} catch (bad_compression_ratio_error const&) {
|
|
||||||
comp_type_ = compression_type::NONE;
|
|
||||||
}
|
}
|
||||||
|
} catch (bad_compression_ratio_error const&) {
|
||||||
|
comp_type_ = compression_type::NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
pctx_->bytes_out += block.size();
|
pctx_->bytes_out += block.size();
|
||||||
@ -487,14 +477,14 @@ class rewritten_fsblock : public fsblock::impl {
|
|||||||
section_type const type_;
|
section_type const type_;
|
||||||
block_compressor const& bc_;
|
block_compressor const& bc_;
|
||||||
mutable std::recursive_mutex mx_;
|
mutable std::recursive_mutex mx_;
|
||||||
std::span<uint8_t const> data_;
|
delayed_data_fn_type data_;
|
||||||
std::optional<shared_byte_buffer> block_data_;
|
std::optional<shared_byte_buffer> block_data_;
|
||||||
std::future<void> future_;
|
std::future<void> future_;
|
||||||
std::optional<uint32_t> number_;
|
std::optional<uint32_t> number_;
|
||||||
std::optional<section_header_v2> mutable header_;
|
std::optional<section_header_v2> mutable header_;
|
||||||
compression_type comp_type_;
|
compression_type comp_type_;
|
||||||
std::shared_ptr<compression_progress> pctx_;
|
std::shared_ptr<compression_progress> pctx_;
|
||||||
compression_type const data_comp_type_;
|
size_t const uncompressed_size_;
|
||||||
};
|
};
|
||||||
|
|
||||||
fsblock::fsblock(section_type type, block_compressor const& bc,
|
fsblock::fsblock(section_type type, block_compressor const& bc,
|
||||||
@ -515,10 +505,10 @@ fsblock::fsblock(fs_section sec, std::span<uint8_t const> data,
|
|||||||
std::move(pctx))) {}
|
std::move(pctx))) {}
|
||||||
|
|
||||||
fsblock::fsblock(section_type type, block_compressor const& bc,
|
fsblock::fsblock(section_type type, block_compressor const& bc,
|
||||||
std::span<uint8_t const> data, compression_type data_comp_type,
|
delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
std::shared_ptr<compression_progress> pctx)
|
std::shared_ptr<compression_progress> pctx)
|
||||||
: impl_(std::make_unique<rewritten_fsblock>(type, bc, data, data_comp_type,
|
: impl_(std::make_unique<rewritten_fsblock>(
|
||||||
std::move(pctx))) {}
|
type, bc, std::move(data), uncompressed_size, std::move(pctx))) {}
|
||||||
|
|
||||||
void fsblock::build_section_header(section_header_v2& sh,
|
void fsblock::build_section_header(section_header_v2& sh,
|
||||||
fsblock::impl const& fsb,
|
fsblock::impl const& fsb,
|
||||||
@ -601,12 +591,15 @@ class filesystem_writer_ final : public filesystem_writer_detail {
|
|||||||
void write_metadata_v2_schema(shared_byte_buffer data) override;
|
void write_metadata_v2_schema(shared_byte_buffer data) override;
|
||||||
void write_metadata_v2(shared_byte_buffer data) override;
|
void write_metadata_v2(shared_byte_buffer data) override;
|
||||||
void write_history(shared_byte_buffer data) override;
|
void write_history(shared_byte_buffer data) override;
|
||||||
void check_block_compression(
|
void check_block_compression(compression_type compression,
|
||||||
compression_type compression, std::span<uint8_t const> data,
|
std::span<uint8_t const> data,
|
||||||
std::optional<fragment_category::value_type> cat) override;
|
std::optional<fragment_category::value_type> cat,
|
||||||
|
block_compression_info* info) override;
|
||||||
void write_section(section_type type, compression_type compression,
|
void write_section(section_type type, compression_type compression,
|
||||||
std::span<uint8_t const> data,
|
std::span<uint8_t const> data,
|
||||||
std::optional<fragment_category::value_type> cat) override;
|
std::optional<fragment_category::value_type> cat) override;
|
||||||
|
void rewrite_block(delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
|
std::optional<fragment_category::value_type> cat) override;
|
||||||
void write_compressed_section(fs_section const& sec,
|
void write_compressed_section(fs_section const& sec,
|
||||||
std::span<uint8_t const> data) override;
|
std::span<uint8_t const> data) override;
|
||||||
void flush() override;
|
void flush() override;
|
||||||
@ -624,6 +617,10 @@ class filesystem_writer_ final : public filesystem_writer_detail {
|
|||||||
write_block_impl(fragment_category cat, shared_byte_buffer data,
|
write_block_impl(fragment_category cat, shared_byte_buffer data,
|
||||||
block_compressor const& bc, std::optional<std::string> meta,
|
block_compressor const& bc, std::optional<std::string> meta,
|
||||||
physical_block_cb_type physical_block_cb);
|
physical_block_cb_type physical_block_cb);
|
||||||
|
void
|
||||||
|
write_section_delayed_data(section_type type, delayed_data_fn_type data,
|
||||||
|
size_t uncompressed_size,
|
||||||
|
std::optional<fragment_category::value_type> cat);
|
||||||
void on_block_merged(block_holder_type holder);
|
void on_block_merged(block_holder_type holder);
|
||||||
void write_section_impl(section_type type, shared_byte_buffer data);
|
void write_section_impl(section_type type, shared_byte_buffer data);
|
||||||
void write(fsblock const& fsb);
|
void write(fsblock const& fsb);
|
||||||
@ -888,7 +885,8 @@ void filesystem_writer_<LoggerPolicy>::write_section_impl(
|
|||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_writer_<LoggerPolicy>::check_block_compression(
|
void filesystem_writer_<LoggerPolicy>::check_block_compression(
|
||||||
compression_type compression, std::span<uint8_t const> data,
|
compression_type compression, std::span<uint8_t const> data,
|
||||||
std::optional<fragment_category::value_type> cat) {
|
std::optional<fragment_category::value_type> cat,
|
||||||
|
block_compression_info* info) {
|
||||||
block_compressor const* bc{nullptr};
|
block_compressor const* bc{nullptr};
|
||||||
|
|
||||||
if (cat) {
|
if (cat) {
|
||||||
@ -897,11 +895,11 @@ void filesystem_writer_<LoggerPolicy>::check_block_compression(
|
|||||||
bc = &default_bc_.value();
|
bc = &default_bc_.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
block_decompressor bd(compression, data);
|
||||||
|
|
||||||
if (auto reqstr = bc->metadata_requirements(); !reqstr.empty()) {
|
if (auto reqstr = bc->metadata_requirements(); !reqstr.empty()) {
|
||||||
auto req = compression_metadata_requirements<nlohmann::json>{reqstr};
|
auto req = compression_metadata_requirements<nlohmann::json>{reqstr};
|
||||||
|
|
||||||
block_decompressor bd(compression, data);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
req.check(bd.metadata());
|
req.check(bd.metadata());
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
@ -912,12 +910,19 @@ void filesystem_writer_<LoggerPolicy>::check_block_compression(
|
|||||||
DWARFS_THROW(runtime_error, msg);
|
DWARFS_THROW(runtime_error, msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info) {
|
||||||
|
info->uncompressed_size = bd.uncompressed_size();
|
||||||
|
info->metadata = bd.metadata();
|
||||||
|
if (info->metadata) {
|
||||||
|
info->constraints = bc->get_compression_constraints(*info->metadata);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_writer_<LoggerPolicy>::write_section(
|
void filesystem_writer_<LoggerPolicy>::write_section_delayed_data(
|
||||||
section_type type, compression_type compression,
|
section_type type, delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
std::span<uint8_t const> data,
|
|
||||||
std::optional<fragment_category::value_type> cat) {
|
std::optional<fragment_category::value_type> cat) {
|
||||||
{
|
{
|
||||||
std::unique_lock lock(mx_);
|
std::unique_lock lock(mx_);
|
||||||
@ -933,7 +938,8 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
|||||||
|
|
||||||
auto& bc = get_compressor(type, cat);
|
auto& bc = get_compressor(type, cat);
|
||||||
|
|
||||||
auto fsb = std::make_unique<fsblock>(type, bc, data, compression, pctx_);
|
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data),
|
||||||
|
uncompressed_size, pctx_);
|
||||||
|
|
||||||
fsb->set_block_no(section_number_++);
|
fsb->set_block_no(section_number_++);
|
||||||
fsb->compress(wg_);
|
fsb->compress(wg_);
|
||||||
@ -944,6 +950,32 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
|||||||
cond_.notify_one();
|
cond_.notify_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void filesystem_writer_<LoggerPolicy>::write_section(
|
||||||
|
section_type type, compression_type compression,
|
||||||
|
std::span<uint8_t const> data,
|
||||||
|
std::optional<fragment_category::value_type> cat) {
|
||||||
|
auto bd = block_decompressor(compression, data);
|
||||||
|
auto uncompressed_size = bd.uncompressed_size();
|
||||||
|
|
||||||
|
write_section_delayed_data(
|
||||||
|
type,
|
||||||
|
[bd = std::move(bd)]() mutable {
|
||||||
|
auto block = bd.start_decompression(malloc_byte_buffer::create());
|
||||||
|
bd.decompress_frame(bd.uncompressed_size());
|
||||||
|
return std::pair{std::move(block), bd.metadata()};
|
||||||
|
},
|
||||||
|
uncompressed_size, cat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void filesystem_writer_<LoggerPolicy>::rewrite_block(
|
||||||
|
delayed_data_fn_type data, size_t uncompressed_size,
|
||||||
|
std::optional<fragment_category::value_type> cat) {
|
||||||
|
write_section_delayed_data(section_type::BLOCK, std::move(data),
|
||||||
|
uncompressed_size, cat);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_writer_<LoggerPolicy>::write_compressed_section(
|
void filesystem_writer_<LoggerPolicy>::write_compressed_section(
|
||||||
fs_section const& sec, std::span<uint8_t const> data) {
|
fs_section const& sec, std::span<uint8_t const> data) {
|
||||||
|
@ -135,6 +135,7 @@ class metadata_builder_ final : public metadata_builder::impl {
|
|||||||
uint32_t num_inodes) override;
|
uint32_t num_inodes) override;
|
||||||
|
|
||||||
void gather_global_entry_data(global_entry_data const& ge_data) override;
|
void gather_global_entry_data(global_entry_data const& ge_data) override;
|
||||||
|
void remap_blocks(std::span<block_mapping const> mapping) override;
|
||||||
|
|
||||||
thrift::metadata::metadata const& build() override;
|
thrift::metadata::metadata const& build() override;
|
||||||
|
|
||||||
@ -269,6 +270,103 @@ void metadata_builder_<LoggerPolicy>::gather_global_entry_data(
|
|||||||
md_.timestamp_base() = ge_data.get_timestamp_base();
|
md_.timestamp_base() = ge_data.get_timestamp_base();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void metadata_builder_<LoggerPolicy>::remap_blocks(
|
||||||
|
std::span<block_mapping const> mapping) {
|
||||||
|
using chunks_t = typename decltype(md_.chunks())::value_type;
|
||||||
|
using chunk_table_t = typename decltype(md_.chunk_table())::value_type;
|
||||||
|
using categories_t = typename decltype(md_.block_categories())::value_type;
|
||||||
|
using category_metadata_t =
|
||||||
|
typename decltype(md_.block_category_metadata())::value_type;
|
||||||
|
|
||||||
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
|
|
||||||
|
std::span<typename chunks_t::value_type> old_chunks = md_.chunks().value();
|
||||||
|
std::span<typename chunk_table_t::value_type> old_chunk_table =
|
||||||
|
md_.chunk_table().value();
|
||||||
|
|
||||||
|
DWARFS_CHECK(!old_chunk_table.empty(), "chunk table must not be empty");
|
||||||
|
|
||||||
|
chunks_t new_chunks;
|
||||||
|
chunk_table_t new_chunk_table;
|
||||||
|
|
||||||
|
new_chunk_table.push_back(0);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < old_chunk_table.size() - 1; ++i) {
|
||||||
|
auto chunks = old_chunks.subspan(
|
||||||
|
old_chunk_table[i], old_chunk_table[i + 1] - old_chunk_table[i]);
|
||||||
|
|
||||||
|
std::vector<block_chunk> mapped_chunks;
|
||||||
|
|
||||||
|
for (auto const& chunk : chunks) {
|
||||||
|
DWARFS_CHECK(chunk.block().value() < mapping.size(),
|
||||||
|
"chunk block out of range");
|
||||||
|
auto mapped = mapping[chunk.block().value()].map_chunk(
|
||||||
|
chunk.offset().value(), chunk.size().value());
|
||||||
|
DWARFS_CHECK(!mapped.empty(), "mapped chunk list is empty");
|
||||||
|
|
||||||
|
auto first = mapped.begin();
|
||||||
|
|
||||||
|
if (!mapped_chunks.empty() &&
|
||||||
|
mapped_chunks.back().block == mapped.front().block &&
|
||||||
|
mapped_chunks.back().offset + mapped_chunks.back().size ==
|
||||||
|
mapped.front().offset) {
|
||||||
|
// merge with previous chunk
|
||||||
|
mapped_chunks.back().size += mapped.front().size;
|
||||||
|
++first;
|
||||||
|
}
|
||||||
|
|
||||||
|
mapped_chunks.insert(mapped_chunks.end(), first, mapped.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const& chunk : mapped_chunks) {
|
||||||
|
auto& nc = new_chunks.emplace_back();
|
||||||
|
nc.block() = chunk.block;
|
||||||
|
nc.offset() = chunk.offset;
|
||||||
|
nc.size() = chunk.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_chunk_table.push_back(new_chunks.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const& old_categories = md_.block_categories();
|
||||||
|
auto const& old_category_metadata = md_.block_category_metadata();
|
||||||
|
|
||||||
|
if (old_categories.has_value() || old_category_metadata.has_value()) {
|
||||||
|
std::unordered_map<uint32_t, uint32_t> block_map;
|
||||||
|
for (auto const& m : mapping) {
|
||||||
|
for (auto const& c : m.chunks) {
|
||||||
|
block_map[c.block] = m.old_block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old_categories.has_value()) {
|
||||||
|
categories_t new_categories;
|
||||||
|
new_categories.resize(block_map.size());
|
||||||
|
for (auto const& [new_block, old_block] : block_map) {
|
||||||
|
new_categories[new_block] = old_categories.value().at(old_block);
|
||||||
|
}
|
||||||
|
md_.block_categories() = std::move(new_categories);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old_category_metadata.has_value()) {
|
||||||
|
category_metadata_t new_category_metadata;
|
||||||
|
for (auto const& [new_block, old_block] : block_map) {
|
||||||
|
auto it = old_category_metadata.value().find(old_block);
|
||||||
|
if (it != old_category_metadata.value().end()) {
|
||||||
|
new_category_metadata[new_block] = it->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
md_.block_category_metadata() = std::move(new_category_metadata);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
md_.chunks() = std::move(new_chunks);
|
||||||
|
md_.chunk_table() = std::move(new_chunk_table);
|
||||||
|
|
||||||
|
tv << "remapping blocks...";
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void metadata_builder_<LoggerPolicy>::update_inodes() {
|
void metadata_builder_<LoggerPolicy>::update_inodes() {
|
||||||
bool const update_uid{options_.uid.has_value()};
|
bool const update_uid{options_.uid.has_value()};
|
||||||
@ -680,6 +778,33 @@ void metadata_builder_<LoggerPolicy>::upgrade_metadata(
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
std::vector<block_chunk>
|
||||||
|
block_mapping::map_chunk(size_t offset, size_t size) const {
|
||||||
|
std::vector<block_chunk> mapped;
|
||||||
|
|
||||||
|
size_t pos{0};
|
||||||
|
|
||||||
|
for (auto const& chunk : chunks) {
|
||||||
|
if (pos + chunk.size > offset) {
|
||||||
|
auto mapped_offset = offset - pos;
|
||||||
|
auto mapped_size = std::min(size, chunk.size - mapped_offset);
|
||||||
|
mapped.push_back(
|
||||||
|
{chunk.block, chunk.offset + mapped_offset, mapped_size});
|
||||||
|
size -= mapped_size;
|
||||||
|
if (size == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset += mapped_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += chunk.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
DWARFS_CHECK(size == 0, "failed to map chunk, size mismatch");
|
||||||
|
|
||||||
|
return mapped;
|
||||||
|
}
|
||||||
|
|
||||||
metadata_builder::metadata_builder(logger& lgr, metadata_options const& options)
|
metadata_builder::metadata_builder(logger& lgr, metadata_options const& options)
|
||||||
: impl_{
|
: impl_{
|
||||||
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||||
|
@ -420,7 +420,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
bool no_progress = false, remove_header = false, no_section_index = false,
|
bool no_progress = false, remove_header = false, no_section_index = false,
|
||||||
force_overwrite = false, no_history = false,
|
force_overwrite = false, no_history = false,
|
||||||
no_history_timestamps = false, no_history_command_line = false,
|
no_history_timestamps = false, no_history_command_line = false,
|
||||||
rebuild_metadata = false;
|
rebuild_metadata = false, change_block_size = false;
|
||||||
unsigned level;
|
unsigned level;
|
||||||
int compress_niceness;
|
int compress_niceness;
|
||||||
uint16_t uid, gid;
|
uint16_t uid, gid;
|
||||||
@ -531,6 +531,9 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
("rebuild-metadata",
|
("rebuild-metadata",
|
||||||
po::value<bool>(&rebuild_metadata)->zero_tokens(),
|
po::value<bool>(&rebuild_metadata)->zero_tokens(),
|
||||||
"fully rebuild metadata")
|
"fully rebuild metadata")
|
||||||
|
("change-block-size",
|
||||||
|
po::value<bool>(&change_block_size)->zero_tokens(),
|
||||||
|
"change block size when recompressing")
|
||||||
("recompress-categories",
|
("recompress-categories",
|
||||||
po::value<std::string>(&recompress_categories),
|
po::value<std::string>(&recompress_categories),
|
||||||
"only recompress blocks of these categories")
|
"only recompress blocks of these categories")
|
||||||
@ -890,7 +893,8 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
|
|
||||||
path = iol.os->canonical(path);
|
path = iol.os->canonical(path);
|
||||||
|
|
||||||
bool recompress = vm.contains("recompress") || rebuild_metadata;
|
bool recompress =
|
||||||
|
vm.contains("recompress") || rebuild_metadata || change_block_size;
|
||||||
utility::rewrite_options rw_opts;
|
utility::rewrite_options rw_opts;
|
||||||
if (recompress) {
|
if (recompress) {
|
||||||
std::unordered_map<std::string, unsigned> const modes{
|
std::unordered_map<std::string, unsigned> const modes{
|
||||||
@ -900,8 +904,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
{"none", 0},
|
{"none", 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (recompress_opts.empty() && rebuild_metadata) {
|
if (recompress_opts.empty()) {
|
||||||
recompress_opts = "metadata";
|
if (change_block_size) {
|
||||||
|
recompress_opts = "all";
|
||||||
|
} else if (rebuild_metadata) {
|
||||||
|
recompress_opts = "metadata";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto it = modes.find(recompress_opts); it != modes.end()) {
|
if (auto it = modes.find(recompress_opts); it != modes.end()) {
|
||||||
@ -913,6 +921,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!recompress_categories.empty()) {
|
if (!recompress_categories.empty()) {
|
||||||
|
if (change_block_size) {
|
||||||
|
iol.err
|
||||||
|
<< "cannot use --recompress-categories with --change-block-size\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
std::string_view input = recompress_categories;
|
std::string_view input = recompress_categories;
|
||||||
if (input.front() == '!') {
|
if (input.front() == '!') {
|
||||||
rw_opts.recompress_categories_exclude = true;
|
rw_opts.recompress_categories_exclude = true;
|
||||||
@ -1412,9 +1426,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if (recompress) {
|
if (recompress) {
|
||||||
if (rebuild_metadata) {
|
if (rebuild_metadata || change_block_size) {
|
||||||
rw_opts.rebuild_metadata = options.metadata;
|
rw_opts.rebuild_metadata = options.metadata;
|
||||||
}
|
}
|
||||||
|
if (change_block_size) {
|
||||||
|
rw_opts.change_block_size = UINT64_C(1) << sf_config.block_size_bits;
|
||||||
|
}
|
||||||
utility::rewrite_filesystem(lgr, *input_filesystem, *fsw, *cat_resolver,
|
utility::rewrite_filesystem(lgr, *input_filesystem, *fsw, *cat_resolver,
|
||||||
rw_opts);
|
rw_opts);
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user