mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-12 05:49:56 -04:00
Better modeling of metadata requirements
This commit is contained in:
parent
e08faf2c0c
commit
9d5969adb7
@ -443,6 +443,12 @@ add_library(dwarfs_compression ${LIBDWARFS_COMPRESSION_SRC})
|
||||
add_library(dwarfs_categorizer ${LIBDWARFS_CATEGORIZER_SRC})
|
||||
add_library(dwarfs_tool src/dwarfs/tool.cpp)
|
||||
|
||||
add_library(dwarfs_compression_metadata src/dwarfs/compression_metadata_requirements.cpp)
|
||||
|
||||
target_link_libraries(dwarfs_compression_metadata folly)
|
||||
target_link_libraries(dwarfs_categorizer dwarfs_compression_metadata)
|
||||
target_link_libraries(dwarfs dwarfs_compression_metadata)
|
||||
|
||||
if(DWARFS_GIT_BUILD)
|
||||
target_include_directories(dwarfs PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
endif()
|
||||
@ -810,7 +816,8 @@ target_link_libraries(metadata_thrift thrift_light)
|
||||
target_link_libraries(compression_thrift thrift_light)
|
||||
|
||||
foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
|
||||
dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS})
|
||||
dwarfs_compression_metadata dwarfs_tool
|
||||
${BINARY_TARGETS} ${MAIN_TARGETS})
|
||||
target_include_directories(
|
||||
${tgt} SYSTEM
|
||||
PRIVATE ${Boost_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS} ${INCLUDE_DIRS}
|
||||
|
@ -33,8 +33,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/dynamic.h>
|
||||
|
||||
#include "dwarfs/compression.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -57,22 +55,30 @@ class block_compressor {
|
||||
block_compressor(block_compressor&& bc) = default;
|
||||
block_compressor& operator=(block_compressor&& rhs) = default;
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t> const& data, folly::dynamic meta) const {
|
||||
return impl_->compress(data, std::move(meta));
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t> const& data) const {
|
||||
return impl_->compress(data, nullptr);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data) const {
|
||||
return impl_->compress(std::move(data), nullptr);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t> const& data,
|
||||
std::string const& metadata) const {
|
||||
return impl_->compress(data, &metadata);
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const {
|
||||
return impl_->compress(std::move(data), std::move(meta));
|
||||
compress(std::vector<uint8_t>&& data, std::string const& metadata) const {
|
||||
return impl_->compress(std::move(data), &metadata);
|
||||
}
|
||||
|
||||
compression_type type() const { return impl_->type(); }
|
||||
|
||||
std::string describe() const { return impl_->describe(); }
|
||||
|
||||
bool check_metadata(folly::dynamic meta) const {
|
||||
return impl_->check_metadata(std::move(meta));
|
||||
std::string metadata_requirements() const {
|
||||
return impl_->metadata_requirements();
|
||||
}
|
||||
|
||||
class impl {
|
||||
@ -82,14 +88,16 @@ class block_compressor {
|
||||
virtual std::unique_ptr<impl> clone() const = 0;
|
||||
|
||||
virtual std::vector<uint8_t>
|
||||
compress(const std::vector<uint8_t>& data, folly::dynamic meta) const = 0;
|
||||
compress(const std::vector<uint8_t>& data,
|
||||
std::string const* metadata) const = 0;
|
||||
virtual std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const = 0;
|
||||
compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const = 0;
|
||||
|
||||
virtual compression_type type() const = 0;
|
||||
virtual std::string describe() const = 0;
|
||||
|
||||
virtual bool check_metadata(folly::dynamic meta) const = 0;
|
||||
virtual std::string metadata_requirements() const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -31,8 +31,6 @@
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
#include <folly/dynamic.h>
|
||||
|
||||
#include "dwarfs/inode_fragments.h"
|
||||
|
||||
namespace boost::program_options {
|
||||
@ -53,9 +51,10 @@ class categorizer {
|
||||
|
||||
virtual std::span<std::string_view const> categories() const = 0;
|
||||
virtual bool is_single_fragment() const = 0;
|
||||
virtual folly::dynamic
|
||||
category_metadata(std::string_view category_name,
|
||||
std::optional<fragment_category> c) const = 0;
|
||||
virtual std::string
|
||||
category_metadata(std::string_view category_name, fragment_category c) const;
|
||||
virtual void set_metadata_requirements(std::string_view category_name,
|
||||
std::string requirements);
|
||||
};
|
||||
|
||||
class random_access_categorizer : public categorizer {
|
||||
@ -128,7 +127,7 @@ class categorizer_manager {
|
||||
|
||||
static fragment_category default_category();
|
||||
|
||||
void add(std::shared_ptr<categorizer const> c) { impl_->add(std::move(c)); }
|
||||
void add(std::shared_ptr<categorizer> c) { impl_->add(std::move(c)); }
|
||||
|
||||
categorizer_job job(std::filesystem::path const& path) const {
|
||||
return impl_->job(path);
|
||||
@ -143,28 +142,28 @@ class categorizer_manager {
|
||||
return impl_->category_value(name);
|
||||
}
|
||||
|
||||
folly::dynamic category_metadata(fragment_category c) const {
|
||||
std::string category_metadata(fragment_category c) const {
|
||||
return impl_->category_metadata(c);
|
||||
}
|
||||
|
||||
folly::dynamic
|
||||
category_metadata_sample(fragment_category::value_type c) const {
|
||||
return impl_->category_metadata_sample(c);
|
||||
void
|
||||
set_metadata_requirements(fragment_category::value_type c, std::string req) {
|
||||
impl_->set_metadata_requirements(c, std::move(req));
|
||||
}
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void add(std::shared_ptr<categorizer const> c) = 0;
|
||||
virtual void add(std::shared_ptr<categorizer> c) = 0;
|
||||
virtual categorizer_job job(std::filesystem::path const& path) const = 0;
|
||||
virtual std::string_view
|
||||
category_name(fragment_category::value_type c) const = 0;
|
||||
virtual std::optional<fragment_category::value_type>
|
||||
category_value(std::string_view name) const = 0;
|
||||
virtual folly::dynamic category_metadata(fragment_category c) const = 0;
|
||||
virtual folly::dynamic
|
||||
category_metadata_sample(fragment_category::value_type c) const = 0;
|
||||
virtual std::string category_metadata(fragment_category c) const = 0;
|
||||
virtual void set_metadata_requirements(fragment_category::value_type c,
|
||||
std::string req) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
291
include/dwarfs/compression_metadata_requirements.h
Normal file
291
include/dwarfs/compression_metadata_requirements.h
Normal file
@ -0,0 +1,291 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/dynamic.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
std::optional<T> value_parser(folly::dynamic const& v) {
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
return v.asString();
|
||||
} else {
|
||||
static_assert(std::is_integral_v<T>);
|
||||
return v.asInt();
|
||||
}
|
||||
}
|
||||
|
||||
void check_dynamic_common(folly::dynamic const& dyn,
|
||||
std::string_view expected_type, size_t expected_size,
|
||||
std::string_view name);
|
||||
|
||||
void check_unsupported_metadata_requirements(folly::dynamic& req);
|
||||
|
||||
template <typename T, typename ValueParser>
|
||||
bool parse_metadata_requirements_set(T& container, folly::dynamic& req,
|
||||
std::string_view name,
|
||||
ValueParser const& value_parser) {
|
||||
if (auto it = req.find(name); it != req.items().end()) {
|
||||
detail::check_dynamic_common(it->second, "set", 2, name);
|
||||
|
||||
if (it->second[1].type() != folly::dynamic::ARRAY) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("non-array type argument for requirement '{}'", name));
|
||||
}
|
||||
|
||||
for (auto v : it->second[1]) {
|
||||
if (auto maybe_value = value_parser(v)) {
|
||||
if (!container.emplace(*maybe_value).second) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"duplicate value '{}' for requirement '{}'", v.asString(), name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
req.erase(it);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T, typename ValueParser>
|
||||
bool parse_metadata_requirements_range(T& min, T& max, folly::dynamic& req,
|
||||
std::string_view name,
|
||||
ValueParser const& value_parser) {
|
||||
if (auto it = req.find(name); it != req.items().end()) {
|
||||
detail::check_dynamic_common(it->second, "range", 3, name);
|
||||
|
||||
auto get_value = [&](std::string_view what, int index) {
|
||||
if (auto maybe_value = value_parser(it->second[index])) {
|
||||
return *maybe_value;
|
||||
}
|
||||
throw std::runtime_error(
|
||||
fmt::format("could not parse {} value '{}' for requirement '{}'",
|
||||
what, it->second[index].asString(), name));
|
||||
};
|
||||
|
||||
min = get_value("minimum", 1);
|
||||
max = get_value("maximum", 2);
|
||||
|
||||
if (min > max) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"expected minimum '{}' to be less than or equal "
|
||||
"to maximum '{}' for requirement '{}'",
|
||||
it->second[1].asString(), it->second[2].asString(), name));
|
||||
}
|
||||
|
||||
req.erase(it);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
class metadata_requirement_base {
|
||||
public:
|
||||
virtual ~metadata_requirement_base() = default;
|
||||
|
||||
metadata_requirement_base(std::string const& name)
|
||||
: name_{name} {}
|
||||
|
||||
virtual void parse(folly::dynamic& req) = 0;
|
||||
|
||||
std::string_view name() const { return name_; }
|
||||
|
||||
private:
|
||||
std::string const name_;
|
||||
};
|
||||
|
||||
template <typename Meta>
|
||||
class checked_metadata_requirement_base : public metadata_requirement_base {
|
||||
public:
|
||||
using metadata_requirement_base::metadata_requirement_base;
|
||||
|
||||
virtual void check(Meta const& m) const = 0;
|
||||
};
|
||||
|
||||
template <typename Meta, typename T, typename U>
|
||||
class typed_metadata_requirement_base
|
||||
: public checked_metadata_requirement_base<Meta> {
|
||||
public:
|
||||
using value_parser_type =
|
||||
std::function<std::optional<T>(folly::dynamic const& v)>;
|
||||
using member_ptr_type = U(Meta::*);
|
||||
|
||||
typed_metadata_requirement_base(std::string const& name, member_ptr_type mp)
|
||||
: checked_metadata_requirement_base<Meta>(name)
|
||||
, mp_{mp}
|
||||
, value_parser_{detail::value_parser<T>} {}
|
||||
|
||||
typed_metadata_requirement_base(std::string const& name, member_ptr_type mp,
|
||||
value_parser_type value_parser)
|
||||
: checked_metadata_requirement_base<Meta>(name)
|
||||
, mp_{mp}
|
||||
, value_parser_{value_parser} {}
|
||||
|
||||
void check(Meta const& m) const override { check_value(m.*mp_); }
|
||||
|
||||
value_parser_type const& value_parser() const { return value_parser_; }
|
||||
|
||||
protected:
|
||||
virtual void check_value(T const& value) const = 0;
|
||||
|
||||
private:
|
||||
member_ptr_type mp_;
|
||||
value_parser_type value_parser_;
|
||||
};
|
||||
|
||||
template <typename Meta, typename T, typename U = T>
|
||||
class metadata_requirement_set
|
||||
: public typed_metadata_requirement_base<Meta, T, U> {
|
||||
public:
|
||||
using typed_metadata_requirement_base<Meta, T,
|
||||
U>::typed_metadata_requirement_base;
|
||||
|
||||
void parse(folly::dynamic& req) override {
|
||||
set_.reset();
|
||||
std::unordered_set<T> tmp;
|
||||
if (parse_metadata_requirements_set(tmp, req, this->name(),
|
||||
this->value_parser())) {
|
||||
set_.emplace(std::move(tmp));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void check_value(T const& value) const override {
|
||||
if (set_ && set_->count(value) == 0) {
|
||||
throw std::range_error(fmt::format("{} '{}' does not meet requirements",
|
||||
this->name(), value));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<std::unordered_set<T>> set_;
|
||||
};
|
||||
|
||||
template <typename Meta, typename T, typename U = T>
|
||||
class metadata_requirement_range
|
||||
: public typed_metadata_requirement_base<Meta, T, U> {
|
||||
public:
|
||||
using typed_metadata_requirement_base<Meta, T,
|
||||
U>::typed_metadata_requirement_base;
|
||||
|
||||
void parse(folly::dynamic& req) override {
|
||||
range_.reset();
|
||||
T min, max;
|
||||
if (parse_metadata_requirements_range(min, max, req, this->name(),
|
||||
this->value_parser())) {
|
||||
range_.emplace(min, max);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void check_value(T const& value) const override {
|
||||
if (range_ && (value < range_->first || value > range_->second)) {
|
||||
throw std::range_error(
|
||||
fmt::format("{} '{}' does not meet requirements [{}..{}]",
|
||||
this->name(), value, range_->first, range_->second));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<std::pair<T, T>> range_;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename Meta = void>
|
||||
class compression_metadata_requirements {
|
||||
public:
|
||||
compression_metadata_requirements() = default;
|
||||
|
||||
template <
|
||||
typename F, typename U,
|
||||
typename T = typename std::invoke_result_t<F, folly::dynamic>::value_type>
|
||||
void add_set(std::string const& name, U(Meta::*mp), F&& value_parser) {
|
||||
req_.emplace_back(
|
||||
std::make_unique<detail::metadata_requirement_set<Meta, T, U>>(
|
||||
name, mp, std::forward<F>(value_parser)));
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void add_set(std::string const& name, U(Meta::*mp)) {
|
||||
add_set(name, mp, detail::value_parser<T>);
|
||||
}
|
||||
|
||||
template <
|
||||
typename F, typename U,
|
||||
typename T = typename std::invoke_result_t<F, folly::dynamic>::value_type>
|
||||
void add_range(std::string const& name, U(Meta::*mp), F&& value_parser) {
|
||||
req_.emplace_back(
|
||||
std::make_unique<detail::metadata_requirement_range<Meta, T, U>>(
|
||||
name, mp, std::forward<F>(value_parser)));
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void add_range(std::string const& name, U(Meta::*mp)) {
|
||||
add_range(name, mp, detail::value_parser<T>);
|
||||
}
|
||||
|
||||
void parse(folly::dynamic req) const {
|
||||
for (auto const& r : req_) {
|
||||
r->parse(req);
|
||||
}
|
||||
|
||||
detail::check_unsupported_metadata_requirements(req);
|
||||
}
|
||||
|
||||
void check(Meta const& meta) const {
|
||||
for (auto const& r : req_) {
|
||||
r->check(meta);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<detail::checked_metadata_requirement_base<Meta>>>
|
||||
req_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class compression_metadata_requirements<void> {
|
||||
public:
|
||||
void parse(folly::dynamic req) const {
|
||||
detail::check_unsupported_metadata_requirements(req);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
@ -26,10 +26,13 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/String.h>
|
||||
#include <folly/container/Enumerate.h>
|
||||
#include <folly/json.h>
|
||||
|
||||
#include "dwarfs/categorizer.h"
|
||||
#include "dwarfs/compiler.h"
|
||||
#include "dwarfs/compression_metadata_requirements.h"
|
||||
#include "dwarfs/error.h"
|
||||
#include "dwarfs/logger.h"
|
||||
|
||||
@ -45,9 +48,21 @@ constexpr std::string_view const DEFAULT_CATEGORY{"<default>"};
|
||||
|
||||
}
|
||||
|
||||
std::string
|
||||
categorizer::category_metadata(std::string_view, fragment_category) const {
|
||||
return std::string();
|
||||
}
|
||||
|
||||
void categorizer::set_metadata_requirements(std::string_view,
|
||||
std::string requirements) {
|
||||
if (!requirements.empty()) {
|
||||
compression_metadata_requirements().parse(folly::parseJson(requirements));
|
||||
}
|
||||
}
|
||||
|
||||
class categorizer_manager_private : public categorizer_manager::impl {
|
||||
public:
|
||||
virtual std::vector<std::shared_ptr<categorizer const>> const&
|
||||
virtual std::vector<std::shared_ptr<categorizer>> const&
|
||||
categorizers() const = 0;
|
||||
virtual fragment_category::value_type
|
||||
category(std::string_view cat) const = 0;
|
||||
@ -100,7 +115,7 @@ void categorizer_job_<LoggerPolicy>::categorize_random_access(
|
||||
bool global_best = true;
|
||||
|
||||
for (auto&& [index, cat] : folly::enumerate(mgr_.categorizers())) {
|
||||
if (auto p = dynamic_cast<random_access_categorizer const*>(cat.get())) {
|
||||
if (auto p = dynamic_cast<random_access_categorizer*>(cat.get())) {
|
||||
if (auto c = p->categorize(path_, data, cat_mapper_)) {
|
||||
best_ = c;
|
||||
index_ = index;
|
||||
@ -126,7 +141,7 @@ void categorizer_job_<LoggerPolicy>::categorize_sequential(
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<sequential_categorizer const*>(cat.get())) {
|
||||
if (auto p = dynamic_cast<sequential_categorizer*>(cat.get())) {
|
||||
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
|
||||
seq_jobs_.emplace_back(index, std::move(job));
|
||||
}
|
||||
@ -180,7 +195,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
||||
add_category(DEFAULT_CATEGORY, std::numeric_limits<size_t>::max());
|
||||
}
|
||||
|
||||
void add(std::shared_ptr<categorizer const> c) override;
|
||||
void add(std::shared_ptr<categorizer> c) override;
|
||||
categorizer_job job(std::filesystem::path const& path) const override;
|
||||
std::string_view
|
||||
category_name(fragment_category::value_type c) const override;
|
||||
@ -194,12 +209,12 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
||||
return rv;
|
||||
}
|
||||
|
||||
folly::dynamic category_metadata(fragment_category c) const override;
|
||||
std::string category_metadata(fragment_category c) const override;
|
||||
|
||||
folly::dynamic
|
||||
category_metadata_sample(fragment_category::value_type c) const override;
|
||||
void set_metadata_requirements(fragment_category::value_type c,
|
||||
std::string req) override;
|
||||
|
||||
std::vector<std::shared_ptr<categorizer const>> const&
|
||||
std::vector<std::shared_ptr<categorizer>> const&
|
||||
categorizers() const override {
|
||||
return categorizers_;
|
||||
}
|
||||
@ -211,8 +226,6 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
||||
}
|
||||
|
||||
private:
|
||||
folly::dynamic category_metadata_impl(fragment_category c, bool sample) const;
|
||||
|
||||
void add_category(std::string_view cat, size_t categorizer_index) {
|
||||
if (catmap_.emplace(cat, categories_.size()).second) {
|
||||
categories_.emplace_back(cat, categorizer_index);
|
||||
@ -223,7 +236,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
||||
|
||||
logger& lgr_;
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
std::vector<std::shared_ptr<categorizer const>> categorizers_;
|
||||
std::vector<std::shared_ptr<categorizer>> categorizers_;
|
||||
// TODO: category descriptions?
|
||||
std::vector<std::pair<std::string_view, size_t>> categories_;
|
||||
std::unordered_map<std::string_view, fragment_category::value_type> catmap_;
|
||||
@ -234,8 +247,7 @@ fragment_category categorizer_manager::default_category() {
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void categorizer_manager_<LoggerPolicy>::add(
|
||||
std::shared_ptr<categorizer const> c) {
|
||||
void categorizer_manager_<LoggerPolicy>::add(std::shared_ptr<categorizer> c) {
|
||||
for (auto const& c : c->categories()) {
|
||||
add_category(c, categorizers_.size());
|
||||
}
|
||||
@ -258,34 +270,25 @@ std::string_view categorizer_manager_<LoggerPolicy>::category_name(
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
folly::dynamic
|
||||
categorizer_manager_<LoggerPolicy>::category_metadata_impl(fragment_category c,
|
||||
bool sample) const {
|
||||
std::string categorizer_manager_<LoggerPolicy>::category_metadata(
|
||||
fragment_category c) const {
|
||||
if (c.value() == 0) {
|
||||
return folly::dynamic();
|
||||
return std::string();
|
||||
}
|
||||
|
||||
auto cat = DWARFS_NOTHROW(categories_.at(c.value()));
|
||||
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
|
||||
std::optional<fragment_category> maybe_category;
|
||||
|
||||
if (!sample) {
|
||||
maybe_category.emplace(c);
|
||||
}
|
||||
|
||||
return categorizer->category_metadata(cat.first, maybe_category);
|
||||
return categorizer->category_metadata(cat.first, c);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
folly::dynamic categorizer_manager_<LoggerPolicy>::category_metadata(
|
||||
fragment_category c) const {
|
||||
return category_metadata_impl(c, false);
|
||||
}
|
||||
void categorizer_manager_<LoggerPolicy>::set_metadata_requirements(
|
||||
fragment_category::value_type c, std::string req) {
|
||||
auto cat = DWARFS_NOTHROW(categories_.at(c));
|
||||
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
folly::dynamic categorizer_manager_<LoggerPolicy>::category_metadata_sample(
|
||||
fragment_category::value_type c) const {
|
||||
return category_metadata_impl(fragment_category(c), true);
|
||||
categorizer->set_metadata_requirements(cat.first, req);
|
||||
}
|
||||
|
||||
categorizer_manager::categorizer_manager(logger& lgr)
|
||||
|
@ -64,12 +64,6 @@ class binary_categorizer_ final : public binary_categorizer_base {
|
||||
|
||||
bool is_single_fragment() const override { return false; }
|
||||
|
||||
folly::dynamic
|
||||
category_metadata(std::string_view,
|
||||
std::optional<fragment_category>) const override {
|
||||
return folly::dynamic();
|
||||
}
|
||||
|
||||
private:
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
};
|
||||
|
@ -166,12 +166,6 @@ class incompressible_categorizer_ final : public sequential_categorizer {
|
||||
|
||||
bool is_single_fragment() const override { return true; }
|
||||
|
||||
folly::dynamic
|
||||
category_metadata(std::string_view,
|
||||
std::optional<fragment_category>) const override {
|
||||
return folly::dynamic();
|
||||
}
|
||||
|
||||
private:
|
||||
logger& lgr_;
|
||||
incompressible_categorizer_config const config_;
|
||||
|
@ -149,12 +149,6 @@ class libmagic_categorizer_ final : public libmagic_categorizer_base {
|
||||
|
||||
bool is_single_fragment() const override { return true; }
|
||||
|
||||
folly::dynamic
|
||||
category_metadata(std::string_view,
|
||||
std::optional<fragment_category>) const override {
|
||||
return folly::dynamic();
|
||||
}
|
||||
|
||||
private:
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
magic_wrapper m_;
|
||||
|
@ -30,13 +30,17 @@
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include <folly/Synchronized.h>
|
||||
#include <folly/json.h>
|
||||
#include <folly/lang/Bits.h>
|
||||
|
||||
#include "dwarfs/categorizer.h"
|
||||
#include "dwarfs/compression_metadata_requirements.h"
|
||||
#include "dwarfs/error.h"
|
||||
#include "dwarfs/logger.h"
|
||||
#include "dwarfs/map_util.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -46,7 +50,7 @@ namespace po = boost::program_options;
|
||||
namespace {
|
||||
|
||||
constexpr std::string_view const METADATA_CATEGORY{"pcmaudio/metadata"};
|
||||
constexpr std::string_view const PCMAUDIO_CATEGORY{"pcmaudio/waveform"};
|
||||
constexpr std::string_view const WAVEFORM_CATEGORY{"pcmaudio/waveform"};
|
||||
|
||||
constexpr size_t const MIN_PCMAUDIO_SIZE{32};
|
||||
|
||||
@ -65,33 +69,97 @@ enum class padding : uint8_t {
|
||||
MSB,
|
||||
};
|
||||
|
||||
char const* endianness_string(endianness e) {
|
||||
std::ostream& operator<<(std::ostream& os, endianness e) {
|
||||
switch (e) {
|
||||
case endianness::BIG:
|
||||
return "big";
|
||||
os << "big";
|
||||
break;
|
||||
case endianness::LITTLE:
|
||||
return "little";
|
||||
os << "little";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("internal error: unhandled endianness value");
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
char const* signedness_string(signedness s) {
|
||||
switch (s) {
|
||||
std::optional<endianness> parse_endianness(std::string_view e) {
|
||||
static std::unordered_map<std::string_view, endianness> const lookup{
|
||||
{"big", endianness::BIG},
|
||||
{"little", endianness::LITTLE},
|
||||
};
|
||||
return get_optional(lookup, e);
|
||||
}
|
||||
|
||||
std::optional<endianness> parse_endianness_dyn(folly::dynamic const& e) {
|
||||
return parse_endianness(e.asString());
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, signedness e) {
|
||||
switch (e) {
|
||||
case signedness::SIGNED:
|
||||
return "signed";
|
||||
os << "signed";
|
||||
break;
|
||||
case signedness::UNSIGNED:
|
||||
return "unsigned";
|
||||
os << "unsigned";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("internal error: unhandled signedness value");
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
char const* padding_string(padding p) {
|
||||
switch (p) {
|
||||
case padding::LSB:
|
||||
return "lsb";
|
||||
case padding::MSB:
|
||||
return "msb";
|
||||
}
|
||||
std::optional<signedness> parse_signedness(std::string_view s) {
|
||||
static std::unordered_map<std::string_view, signedness> const lookup{
|
||||
{"signed", signedness::SIGNED},
|
||||
{"unsigned", signedness::UNSIGNED},
|
||||
};
|
||||
return get_optional(lookup, s);
|
||||
}
|
||||
|
||||
std::optional<signedness> parse_signedness_dyn(folly::dynamic const& s) {
|
||||
return parse_signedness(s.asString());
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, padding e) {
|
||||
switch (e) {
|
||||
case padding::LSB:
|
||||
os << "lsb";
|
||||
break;
|
||||
case padding::MSB:
|
||||
os << "msb";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("internal error: unhandled padding value");
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
std::optional<padding> parse_padding(std::string_view p) {
|
||||
static std::unordered_map<std::string_view, padding> const lookup{
|
||||
{"lsb", padding::LSB},
|
||||
{"msb", padding::MSB},
|
||||
};
|
||||
return get_optional(lookup, p);
|
||||
}
|
||||
|
||||
std::optional<padding> parse_padding_dyn(folly::dynamic const& p) {
|
||||
return parse_padding(p.asString());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace dwarfs
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<dwarfs::endianness> : ostream_formatter {};
|
||||
template <>
|
||||
struct fmt::formatter<dwarfs::signedness> : ostream_formatter {};
|
||||
template <>
|
||||
struct fmt::formatter<dwarfs::padding> : ostream_formatter {};
|
||||
|
||||
namespace dwarfs {
|
||||
namespace {
|
||||
|
||||
struct pcmaudio_metadata {
|
||||
endianness sample_endianness;
|
||||
signedness sample_signedness;
|
||||
@ -325,9 +393,8 @@ class iff_parser final {
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, pcmaudio_metadata const& m) {
|
||||
os << "[" << endianness_string(m.sample_endianness) << ", "
|
||||
<< signedness_string(m.sample_signedness) << ", "
|
||||
<< padding_string(m.sample_padding) << ", "
|
||||
os << "[" << m.sample_endianness << ", " << m.sample_signedness << ", "
|
||||
<< m.sample_padding << ", "
|
||||
<< "bits=" << static_cast<int>(m.bits_per_sample) << ", "
|
||||
<< "bytes=" << static_cast<int>(m.bytes_per_sample) << ", "
|
||||
<< "channels=" << static_cast<int>(m.number_of_channels) << "]";
|
||||
@ -349,27 +416,16 @@ class pcmaudio_metadata_store {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
folly::dynamic lookup(size_t ix) const {
|
||||
std::string lookup(size_t ix) const {
|
||||
auto const& m = DWARFS_NOTHROW(forward_index_.at(ix));
|
||||
folly::dynamic obj = folly::dynamic::object;
|
||||
obj.insert("endianness", endianness_string(m.sample_endianness));
|
||||
obj.insert("signedness", signedness_string(m.sample_signedness));
|
||||
obj.insert("padding", padding_string(m.sample_padding));
|
||||
obj.insert("endianness", fmt::format("{}", m.sample_endianness));
|
||||
obj.insert("signedness", fmt::format("{}", m.sample_signedness));
|
||||
obj.insert("padding", fmt::format("{}", m.sample_padding));
|
||||
obj.insert("bytes_per_sample", m.bytes_per_sample);
|
||||
obj.insert("bits_per_sample", m.bits_per_sample);
|
||||
obj.insert("number_of_channels", m.number_of_channels);
|
||||
return obj;
|
||||
}
|
||||
|
||||
static folly::dynamic sample() {
|
||||
folly::dynamic obj = folly::dynamic::object;
|
||||
obj.insert("endianness", endianness_string(endianness::BIG));
|
||||
obj.insert("signedness", signedness_string(signedness::SIGNED));
|
||||
obj.insert("padding", padding_string(padding::LSB));
|
||||
obj.insert("bytes_per_sample", 2);
|
||||
obj.insert("bits_per_sample", 16);
|
||||
obj.insert("number_of_channels", 2);
|
||||
return obj;
|
||||
return folly::toJson(obj);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -386,7 +442,20 @@ template <typename LoggerPolicy>
|
||||
class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
||||
public:
|
||||
pcmaudio_categorizer_(logger& lgr)
|
||||
: LOG_PROXY_INIT(lgr) {}
|
||||
: LOG_PROXY_INIT(lgr) {
|
||||
waveform_req_.add_set("endianness", &pcmaudio_metadata::sample_endianness,
|
||||
parse_endianness_dyn);
|
||||
waveform_req_.add_set("signedness", &pcmaudio_metadata::sample_signedness,
|
||||
parse_signedness_dyn);
|
||||
waveform_req_.add_set("padding", &pcmaudio_metadata::sample_padding,
|
||||
parse_padding_dyn);
|
||||
waveform_req_.add_range<int>("bytes_per_sample",
|
||||
&pcmaudio_metadata::bytes_per_sample);
|
||||
waveform_req_.add_range<int>("bits_per_sample",
|
||||
&pcmaudio_metadata::bits_per_sample);
|
||||
waveform_req_.add_range<int>("number_of_channels",
|
||||
&pcmaudio_metadata::number_of_channels);
|
||||
}
|
||||
|
||||
inode_fragments
|
||||
categorize(fs::path const& path, std::span<uint8_t const> data,
|
||||
@ -394,21 +463,19 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
||||
|
||||
bool is_single_fragment() const override { return false; }
|
||||
|
||||
folly::dynamic
|
||||
category_metadata(std::string_view category_name,
|
||||
std::optional<fragment_category> c) const override {
|
||||
if (category_name == PCMAUDIO_CATEGORY) {
|
||||
if (c) {
|
||||
DWARFS_CHECK(c->has_subcategory(),
|
||||
"expected PCMAUDIO to have subcategory");
|
||||
return meta_.rlock()->lookup(c->subcategory());
|
||||
} else {
|
||||
return pcmaudio_metadata_store::sample();
|
||||
}
|
||||
std::string category_metadata(std::string_view category_name,
|
||||
fragment_category c) const override {
|
||||
if (category_name == WAVEFORM_CATEGORY) {
|
||||
DWARFS_CHECK(c.has_subcategory(),
|
||||
"expected PCMAUDIO to have subcategory");
|
||||
return meta_.rlock()->lookup(c.subcategory());
|
||||
}
|
||||
return folly::dynamic();
|
||||
return std::string();
|
||||
}
|
||||
|
||||
void set_metadata_requirements(std::string_view category_name,
|
||||
std::string requirements) override;
|
||||
|
||||
private:
|
||||
bool check_aiff(inode_fragments& frag, fs::path const& path,
|
||||
std::span<uint8_t const> data,
|
||||
@ -428,15 +495,20 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
||||
std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const;
|
||||
|
||||
bool check_metadata_requirements(pcmaudio_metadata const& meta,
|
||||
std::string_view context,
|
||||
fs::path const& path) const;
|
||||
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
folly::Synchronized<pcmaudio_metadata_store> mutable meta_;
|
||||
compression_metadata_requirements<pcmaudio_metadata> waveform_req_;
|
||||
};
|
||||
|
||||
std::span<std::string_view const>
|
||||
pcmaudio_categorizer_base::categories() const {
|
||||
static constexpr std::array const s_categories{
|
||||
METADATA_CATEGORY,
|
||||
PCMAUDIO_CATEGORY,
|
||||
WAVEFORM_CATEGORY,
|
||||
};
|
||||
return s_categories;
|
||||
}
|
||||
@ -517,6 +589,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_aiff(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!check_metadata_requirements(meta, "AIFF", path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
meta_valid = true;
|
||||
|
||||
LOG_TRACE << "[AIFF] " << path << ": meta=" << meta;
|
||||
@ -553,7 +629,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_aiff(
|
||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||
pcm_start);
|
||||
frag.emplace_back(
|
||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
||||
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||
pcm_length);
|
||||
|
||||
if (pcm_start + pcm_length < data.size()) {
|
||||
@ -710,6 +786,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_caf(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!check_metadata_requirements(meta, "CAF", path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
meta_valid = true;
|
||||
|
||||
LOG_TRACE << "[CAF] " << path << ": meta=" << meta;
|
||||
@ -736,7 +816,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_caf(
|
||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||
pcm_start);
|
||||
frag.emplace_back(
|
||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
||||
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||
pcm_length);
|
||||
|
||||
if (pcm_start + pcm_length < data.size()) {
|
||||
@ -885,6 +965,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!check_metadata_requirements(meta, FormatPolicy::format_name, path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
meta_valid = true;
|
||||
|
||||
LOG_TRACE << "[" << FormatPolicy::format_name << "] " << path
|
||||
@ -912,7 +996,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||
pcm_start);
|
||||
frag.emplace_back(
|
||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
||||
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||
pcm_length);
|
||||
|
||||
if (pcm_start + pcm_length < data.size()) {
|
||||
@ -927,6 +1011,20 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
bool pcmaudio_categorizer_<LoggerPolicy>::check_metadata_requirements(
|
||||
pcmaudio_metadata const& meta, std::string_view context,
|
||||
fs::path const& path) const {
|
||||
try {
|
||||
waveform_req_.check(meta);
|
||||
} catch (std::exception const& e) {
|
||||
LOG_WARN << "[" << context << "] " << path << ": " << e.what();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
||||
fs::path const& path, std::span<uint8_t const> data,
|
||||
@ -954,6 +1052,19 @@ inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
||||
return fragments;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void pcmaudio_categorizer_<LoggerPolicy>::set_metadata_requirements(
|
||||
std::string_view category_name, std::string requirements) {
|
||||
if (!requirements.empty()) {
|
||||
auto req = folly::parseJson(requirements);
|
||||
if (category_name == WAVEFORM_CATEGORY) {
|
||||
waveform_req_.parse(req);
|
||||
} else {
|
||||
compression_metadata_requirements().parse(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class pcmaudio_categorizer_factory : public categorizer_factory {
|
||||
public:
|
||||
std::string_view name() const override { return "pcmaudio"; }
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/String.h>
|
||||
|
||||
#include "dwarfs/categorizer.h"
|
||||
#include "dwarfs/category_parser.h"
|
||||
|
||||
|
@ -49,8 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<brotli_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic /*meta*/) const override {
|
||||
std::vector<uint8_t>
|
||||
compress(const std::vector<uint8_t>& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
std::vector<uint8_t> compressed;
|
||||
compressed.resize(folly::kMaxVarintLength64 +
|
||||
::BrotliEncoderMaxCompressedSize(data.size()));
|
||||
@ -69,9 +70,9 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
return compressed;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const override {
|
||||
return compress(data, metadata);
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::BROTLI; }
|
||||
@ -80,7 +81,7 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_);
|
||||
}
|
||||
|
||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
||||
std::string metadata_requirements() const override { return std::string(); }
|
||||
|
||||
private:
|
||||
uint32_t const quality_;
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/Varint.h>
|
||||
#include <folly/json.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/compression.h"
|
||||
@ -204,7 +205,14 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic meta) const override {
|
||||
std::string const* metadata) const override {
|
||||
if (!metadata) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
"internal error: flac compression requires metadata");
|
||||
}
|
||||
|
||||
auto meta = folly::parseJson(*metadata);
|
||||
|
||||
auto endianness = meta["endianness"].asString();
|
||||
auto signedness = meta["signedness"].asString();
|
||||
auto padding = meta["padding"].asString();
|
||||
@ -332,9 +340,9 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
return compressed;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const override {
|
||||
return compress(data, metadata);
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::FLAC; }
|
||||
@ -344,15 +352,20 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
exhaustive_ ? ", exhaustive" : "");
|
||||
}
|
||||
|
||||
bool check_metadata(folly::dynamic meta) const override {
|
||||
if (meta.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return meta.count("endianness") > 0 && meta.count("signedness") > 0 &&
|
||||
meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 &&
|
||||
meta.count("bits_per_sample") > 0 &&
|
||||
meta.count("number_of_channels") > 0;
|
||||
std::string metadata_requirements() const override {
|
||||
folly::dynamic req = folly::dynamic::object
|
||||
// clang-format off
|
||||
("endianness", folly::dynamic::array("set",
|
||||
folly::dynamic::array("big", "little")))
|
||||
("signedness", folly::dynamic::array("set",
|
||||
folly::dynamic::array("signed", "unsigned")))
|
||||
("padding", folly::dynamic::array("set",
|
||||
folly::dynamic::array("msb", "lsb")))
|
||||
("bytes_per_sample", folly::dynamic::array("range", 1, 4))
|
||||
("bits_per_sample", folly::dynamic::array("range", 8, 32))
|
||||
("number_of_channels", folly::dynamic::array("range", 1, 8))
|
||||
; // clang-format on
|
||||
return folly::toJson(req);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -66,8 +66,9 @@ class lz4_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<lz4_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic /*meta*/) const override {
|
||||
std::vector<uint8_t>
|
||||
compress(const std::vector<uint8_t>& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
std::vector<uint8_t> compressed(
|
||||
sizeof(uint32_t) + LZ4_compressBound(folly::to<int>(data.size())));
|
||||
*reinterpret_cast<uint32_t*>(&compressed[0]) = data.size();
|
||||
@ -84,16 +85,16 @@ class lz4_block_compressor final : public block_compressor::impl {
|
||||
return compressed;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const override {
|
||||
return compress(data, metadata);
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::LZ4; }
|
||||
|
||||
std::string describe() const override { return Policy::describe(level_); }
|
||||
|
||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
||||
std::string metadata_requirements() const override { return std::string(); }
|
||||
|
||||
private:
|
||||
const int level_;
|
||||
|
@ -64,17 +64,17 @@ class lzma_block_compressor final : public block_compressor::impl {
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic meta) const override;
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
std::string const* metadata) const override;
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const override {
|
||||
return compress(data, metadata);
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::LZMA; }
|
||||
|
||||
std::string describe() const override { return description_; }
|
||||
|
||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
||||
std::string metadata_requirements() const override { return std::string(); }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>
|
||||
@ -178,7 +178,7 @@ lzma_block_compressor::compress(const std::vector<uint8_t>& data,
|
||||
|
||||
std::vector<uint8_t>
|
||||
lzma_block_compressor::compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic /*meta*/) const {
|
||||
std::string const* /*metadata*/) const {
|
||||
std::vector<uint8_t> best = compress(data, &filters_[1]);
|
||||
|
||||
if (filters_[0].id != LZMA_VLI_UNKNOWN) {
|
||||
|
@ -37,13 +37,15 @@ class null_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<null_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic /*meta*/) const override {
|
||||
std::vector<uint8_t>
|
||||
compress(const std::vector<uint8_t>& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
return data;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
folly::dynamic /*meta*/) const override {
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
return std::move(data);
|
||||
}
|
||||
|
||||
@ -51,7 +53,7 @@ class null_block_compressor final : public block_compressor::impl {
|
||||
|
||||
std::string describe() const override { return "null"; }
|
||||
|
||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
||||
std::string metadata_requirements() const override { return std::string(); }
|
||||
};
|
||||
|
||||
class null_block_decompressor final : public block_decompressor::impl {
|
||||
|
@ -55,11 +55,11 @@ class zstd_block_compressor final : public block_compressor::impl {
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic meta) const override;
|
||||
std::string const* metadata) const override;
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||
std::string const* metadata) const override {
|
||||
return compress(data, std::move(metadata));
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::ZSTD; }
|
||||
@ -68,7 +68,7 @@ class zstd_block_compressor final : public block_compressor::impl {
|
||||
return fmt::format("zstd [level={}]", level_);
|
||||
}
|
||||
|
||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
||||
std::string metadata_requirements() const override { return std::string(); }
|
||||
|
||||
private:
|
||||
class scoped_context;
|
||||
@ -147,7 +147,7 @@ std::weak_ptr<zstd_block_compressor::context_manager>
|
||||
|
||||
std::vector<uint8_t>
|
||||
zstd_block_compressor::compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic /*meta*/) const {
|
||||
std::string const* /*metadata*/) const {
|
||||
std::vector<uint8_t> compressed(ZSTD_compressBound(data.size()));
|
||||
scoped_context ctx(*ctxmgr_);
|
||||
auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(),
|
||||
|
63
src/dwarfs/compression_metadata_requirements.cpp
Normal file
63
src/dwarfs/compression_metadata_requirements.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "dwarfs/compression_metadata_requirements.h"
|
||||
|
||||
namespace dwarfs::detail {
|
||||
|
||||
void check_dynamic_common(folly::dynamic const& dyn,
|
||||
std::string_view expected_type, size_t expected_size,
|
||||
std::string_view name) {
|
||||
if (dyn.type() != folly::dynamic::ARRAY) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("found non-array type for requirement '{}'", name));
|
||||
}
|
||||
if (dyn.empty()) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("unexpected empty value for requirement '{}'", name));
|
||||
}
|
||||
if (auto type = dyn[0].asString(); type != expected_type) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("invalid type '{}' for requirement '{}', expected '{}'",
|
||||
type, name, expected_type));
|
||||
}
|
||||
if (dyn.size() != expected_size) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("unexpected size '{}' for requirement '{}', expected {}",
|
||||
dyn.size(), name, expected_size));
|
||||
}
|
||||
}
|
||||
|
||||
void check_unsupported_metadata_requirements(folly::dynamic& req) {
|
||||
if (!req.empty()) {
|
||||
std::vector<std::string> keys;
|
||||
for (auto k : req.keys()) {
|
||||
keys.emplace_back(k.asString());
|
||||
}
|
||||
std::sort(keys.begin(), keys.end());
|
||||
throw std::runtime_error(fmt::format(
|
||||
"unsupported metadata requirements: {}", folly::join(", ", keys)));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace dwarfs::detail
|
@ -102,8 +102,7 @@ class raw_fsblock : public fsblock::impl {
|
||||
wg.add_job([this, prom = std::move(prom)]() mutable {
|
||||
try {
|
||||
// TODO: metadata
|
||||
auto tmp = std::make_shared<block_data>(
|
||||
bc_.compress(data_->vec(), folly::dynamic()));
|
||||
auto tmp = std::make_shared<block_data>(bc_.compress(data_->vec()));
|
||||
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
|
@ -1025,11 +1025,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
||||
|
||||
compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr](
|
||||
auto cat, block_compressor const& bc) {
|
||||
if (!bc.check_metadata(catmgr->category_metadata_sample(cat))) {
|
||||
try {
|
||||
catmgr->set_metadata_requirements(cat, bc.metadata_requirements());
|
||||
} catch (std::exception const& e) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("compression '{}' cannot be used for category '{}': "
|
||||
"insufficient metadata",
|
||||
bc.describe(), catmgr->category_name(cat)));
|
||||
"metadata requirements not met ({})",
|
||||
bc.describe(), catmgr->category_name(cat), e.what()));
|
||||
}
|
||||
});
|
||||
} catch (std::exception const& e) {
|
||||
|
@ -24,6 +24,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <folly/json.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/pcm_sample_transformer.h"
|
||||
|
||||
@ -148,7 +150,7 @@ TEST(flac_compressor, basic) {
|
||||
|
||||
block_compressor comp("flac");
|
||||
|
||||
auto compressed = comp.compress(data, std::move(meta));
|
||||
auto compressed = comp.compress(data, folly::toJson(meta));
|
||||
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
@ -181,7 +183,7 @@ TEST_P(flac_param, combinations) {
|
||||
|
||||
block_compressor comp("flac");
|
||||
|
||||
auto compressed = comp.compress(data, std::move(meta));
|
||||
auto compressed = comp.compress(data, folly::toJson(meta));
|
||||
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user