diff --git a/CMakeLists.txt b/CMakeLists.txt index f3e57231..85134277 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -443,6 +443,12 @@ add_library(dwarfs_compression ${LIBDWARFS_COMPRESSION_SRC}) add_library(dwarfs_categorizer ${LIBDWARFS_CATEGORIZER_SRC}) add_library(dwarfs_tool src/dwarfs/tool.cpp) +add_library(dwarfs_compression_metadata src/dwarfs/compression_metadata_requirements.cpp) + +target_link_libraries(dwarfs_compression_metadata folly) +target_link_libraries(dwarfs_categorizer dwarfs_compression_metadata) +target_link_libraries(dwarfs dwarfs_compression_metadata) + if(DWARFS_GIT_BUILD) target_include_directories(dwarfs PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include) endif() @@ -810,7 +816,8 @@ target_link_libraries(metadata_thrift thrift_light) target_link_libraries(compression_thrift thrift_light) foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer - dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS}) + dwarfs_compression_metadata dwarfs_tool + ${BINARY_TARGETS} ${MAIN_TARGETS}) target_include_directories( ${tgt} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS} ${INCLUDE_DIRS} diff --git a/include/dwarfs/block_compressor.h b/include/dwarfs/block_compressor.h index 375f3d99..c3a8ab7e 100644 --- a/include/dwarfs/block_compressor.h +++ b/include/dwarfs/block_compressor.h @@ -33,8 +33,6 @@ #include #include -#include - #include "dwarfs/compression.h" namespace dwarfs { @@ -57,22 +55,30 @@ class block_compressor { block_compressor(block_compressor&& bc) = default; block_compressor& operator=(block_compressor&& rhs) = default; - std::vector - compress(std::vector const& data, folly::dynamic meta) const { - return impl_->compress(data, std::move(meta)); + std::vector compress(std::vector const& data) const { + return impl_->compress(data, nullptr); + } + + std::vector compress(std::vector&& data) const { + return impl_->compress(std::move(data), nullptr); + } + + std::vector compress(std::vector const& data, + std::string const& metadata) const { + return impl_->compress(data, &metadata); } std::vector - compress(std::vector&& data, folly::dynamic meta) const { - return impl_->compress(std::move(data), std::move(meta)); + compress(std::vector&& data, std::string const& metadata) const { + return impl_->compress(std::move(data), &metadata); } compression_type type() const { return impl_->type(); } std::string describe() const { return impl_->describe(); } - bool check_metadata(folly::dynamic meta) const { - return impl_->check_metadata(std::move(meta)); + std::string metadata_requirements() const { + return impl_->metadata_requirements(); } class impl { @@ -82,14 +88,16 @@ class block_compressor { virtual std::unique_ptr clone() const = 0; virtual std::vector - compress(const std::vector& data, folly::dynamic meta) const = 0; + compress(const std::vector& data, + std::string const* metadata) const = 0; virtual std::vector - compress(std::vector&& data, folly::dynamic meta) const = 0; + compress(std::vector&& data, + std::string const* metadata) const = 0; virtual compression_type type() const = 0; virtual std::string describe() const = 0; - virtual bool check_metadata(folly::dynamic meta) const = 0; + virtual std::string metadata_requirements() const = 0; }; private: diff --git a/include/dwarfs/categorizer.h b/include/dwarfs/categorizer.h index 86cb20b9..10d6adab 100644 --- a/include/dwarfs/categorizer.h +++ b/include/dwarfs/categorizer.h @@ -31,8 +31,6 @@ #include #include -#include - #include "dwarfs/inode_fragments.h" namespace boost::program_options { @@ -53,9 +51,10 @@ class categorizer { virtual std::span categories() const = 0; virtual bool is_single_fragment() const = 0; - virtual folly::dynamic - category_metadata(std::string_view category_name, - std::optional c) const = 0; + virtual std::string + category_metadata(std::string_view category_name, fragment_category c) const; + virtual void set_metadata_requirements(std::string_view category_name, + std::string requirements); }; class random_access_categorizer : public categorizer { @@ -128,7 +127,7 @@ class categorizer_manager { static fragment_category default_category(); - void add(std::shared_ptr c) { impl_->add(std::move(c)); } + void add(std::shared_ptr c) { impl_->add(std::move(c)); } categorizer_job job(std::filesystem::path const& path) const { return impl_->job(path); @@ -143,28 +142,28 @@ class categorizer_manager { return impl_->category_value(name); } - folly::dynamic category_metadata(fragment_category c) const { + std::string category_metadata(fragment_category c) const { return impl_->category_metadata(c); } - folly::dynamic - category_metadata_sample(fragment_category::value_type c) const { - return impl_->category_metadata_sample(c); + void + set_metadata_requirements(fragment_category::value_type c, std::string req) { + impl_->set_metadata_requirements(c, std::move(req)); } class impl { public: virtual ~impl() = default; - virtual void add(std::shared_ptr c) = 0; + virtual void add(std::shared_ptr c) = 0; virtual categorizer_job job(std::filesystem::path const& path) const = 0; virtual std::string_view category_name(fragment_category::value_type c) const = 0; virtual std::optional category_value(std::string_view name) const = 0; - virtual folly::dynamic category_metadata(fragment_category c) const = 0; - virtual folly::dynamic - category_metadata_sample(fragment_category::value_type c) const = 0; + virtual std::string category_metadata(fragment_category c) const = 0; + virtual void set_metadata_requirements(fragment_category::value_type c, + std::string req) = 0; }; private: diff --git a/include/dwarfs/compression_metadata_requirements.h b/include/dwarfs/compression_metadata_requirements.h new file mode 100644 index 00000000..c1188f50 --- /dev/null +++ b/include/dwarfs/compression_metadata_requirements.h @@ -0,0 +1,291 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace dwarfs { + +namespace detail { + +template +std::optional value_parser(folly::dynamic const& v) { + if constexpr (std::is_same_v) { + return v.asString(); + } else { + static_assert(std::is_integral_v); + return v.asInt(); + } +} + +void check_dynamic_common(folly::dynamic const& dyn, + std::string_view expected_type, size_t expected_size, + std::string_view name); + +void check_unsupported_metadata_requirements(folly::dynamic& req); + +template +bool parse_metadata_requirements_set(T& container, folly::dynamic& req, + std::string_view name, + ValueParser const& value_parser) { + if (auto it = req.find(name); it != req.items().end()) { + detail::check_dynamic_common(it->second, "set", 2, name); + + if (it->second[1].type() != folly::dynamic::ARRAY) { + throw std::runtime_error( + fmt::format("non-array type argument for requirement '{}'", name)); + } + + for (auto v : it->second[1]) { + if (auto maybe_value = value_parser(v)) { + if (!container.emplace(*maybe_value).second) { + throw std::runtime_error(fmt::format( + "duplicate value '{}' for requirement '{}'", v.asString(), name)); + } + } + } + + req.erase(it); + + return true; + } + + return false; +} + +template +bool parse_metadata_requirements_range(T& min, T& max, folly::dynamic& req, + std::string_view name, + ValueParser const& value_parser) { + if (auto it = req.find(name); it != req.items().end()) { + detail::check_dynamic_common(it->second, "range", 3, name); + + auto get_value = [&](std::string_view what, int index) { + if (auto maybe_value = value_parser(it->second[index])) { + return *maybe_value; + } + throw std::runtime_error( + fmt::format("could not parse {} value '{}' for requirement '{}'", + what, it->second[index].asString(), name)); + }; + + min = get_value("minimum", 1); + max = get_value("maximum", 2); + + if (min > max) { + throw std::runtime_error(fmt::format( + "expected minimum '{}' to be less than or equal " + "to maximum '{}' for requirement '{}'", + it->second[1].asString(), it->second[2].asString(), name)); + } + + req.erase(it); + + return true; + } + + return false; +} + +class metadata_requirement_base { + public: + virtual ~metadata_requirement_base() = default; + + metadata_requirement_base(std::string const& name) + : name_{name} {} + + virtual void parse(folly::dynamic& req) = 0; + + std::string_view name() const { return name_; } + + private: + std::string const name_; +}; + +template +class checked_metadata_requirement_base : public metadata_requirement_base { + public: + using metadata_requirement_base::metadata_requirement_base; + + virtual void check(Meta const& m) const = 0; +}; + +template +class typed_metadata_requirement_base + : public checked_metadata_requirement_base { + public: + using value_parser_type = + std::function(folly::dynamic const& v)>; + using member_ptr_type = U(Meta::*); + + typed_metadata_requirement_base(std::string const& name, member_ptr_type mp) + : checked_metadata_requirement_base(name) + , mp_{mp} + , value_parser_{detail::value_parser} {} + + typed_metadata_requirement_base(std::string const& name, member_ptr_type mp, + value_parser_type value_parser) + : checked_metadata_requirement_base(name) + , mp_{mp} + , value_parser_{value_parser} {} + + void check(Meta const& m) const override { check_value(m.*mp_); } + + value_parser_type const& value_parser() const { return value_parser_; } + + protected: + virtual void check_value(T const& value) const = 0; + + private: + member_ptr_type mp_; + value_parser_type value_parser_; +}; + +template +class metadata_requirement_set + : public typed_metadata_requirement_base { + public: + using typed_metadata_requirement_base::typed_metadata_requirement_base; + + void parse(folly::dynamic& req) override { + set_.reset(); + std::unordered_set tmp; + if (parse_metadata_requirements_set(tmp, req, this->name(), + this->value_parser())) { + set_.emplace(std::move(tmp)); + } + } + + protected: + void check_value(T const& value) const override { + if (set_ && set_->count(value) == 0) { + throw std::range_error(fmt::format("{} '{}' does not meet requirements", + this->name(), value)); + } + } + + private: + std::optional> set_; +}; + +template +class metadata_requirement_range + : public typed_metadata_requirement_base { + public: + using typed_metadata_requirement_base::typed_metadata_requirement_base; + + void parse(folly::dynamic& req) override { + range_.reset(); + T min, max; + if (parse_metadata_requirements_range(min, max, req, this->name(), + this->value_parser())) { + range_.emplace(min, max); + } + } + + protected: + void check_value(T const& value) const override { + if (range_ && (value < range_->first || value > range_->second)) { + throw std::range_error( + fmt::format("{} '{}' does not meet requirements [{}..{}]", + this->name(), value, range_->first, range_->second)); + } + } + + private: + std::optional> range_; +}; + +} // namespace detail + +template +class compression_metadata_requirements { + public: + compression_metadata_requirements() = default; + + template < + typename F, typename U, + typename T = typename std::invoke_result_t::value_type> + void add_set(std::string const& name, U(Meta::*mp), F&& value_parser) { + req_.emplace_back( + std::make_unique>( + name, mp, std::forward(value_parser))); + } + + template + void add_set(std::string const& name, U(Meta::*mp)) { + add_set(name, mp, detail::value_parser); + } + + template < + typename F, typename U, + typename T = typename std::invoke_result_t::value_type> + void add_range(std::string const& name, U(Meta::*mp), F&& value_parser) { + req_.emplace_back( + std::make_unique>( + name, mp, std::forward(value_parser))); + } + + template + void add_range(std::string const& name, U(Meta::*mp)) { + add_range(name, mp, detail::value_parser); + } + + void parse(folly::dynamic req) const { + for (auto const& r : req_) { + r->parse(req); + } + + detail::check_unsupported_metadata_requirements(req); + } + + void check(Meta const& meta) const { + for (auto const& r : req_) { + r->check(meta); + } + } + + private: + std::vector>> + req_; +}; + +template <> +class compression_metadata_requirements { + public: + void parse(folly::dynamic req) const { + detail::check_unsupported_metadata_requirements(req); + } +}; + +} // namespace dwarfs diff --git a/src/dwarfs/categorizer.cpp b/src/dwarfs/categorizer.cpp index 8f7f4d71..0c1ca9ab 100644 --- a/src/dwarfs/categorizer.cpp +++ b/src/dwarfs/categorizer.cpp @@ -26,10 +26,13 @@ #include +#include #include +#include #include "dwarfs/categorizer.h" #include "dwarfs/compiler.h" +#include "dwarfs/compression_metadata_requirements.h" #include "dwarfs/error.h" #include "dwarfs/logger.h" @@ -45,9 +48,21 @@ constexpr std::string_view const DEFAULT_CATEGORY{""}; } +std::string +categorizer::category_metadata(std::string_view, fragment_category) const { + return std::string(); +} + +void categorizer::set_metadata_requirements(std::string_view, + std::string requirements) { + if (!requirements.empty()) { + compression_metadata_requirements().parse(folly::parseJson(requirements)); + } +} + class categorizer_manager_private : public categorizer_manager::impl { public: - virtual std::vector> const& + virtual std::vector> const& categorizers() const = 0; virtual fragment_category::value_type category(std::string_view cat) const = 0; @@ -100,7 +115,7 @@ void categorizer_job_::categorize_random_access( bool global_best = true; for (auto&& [index, cat] : folly::enumerate(mgr_.categorizers())) { - if (auto p = dynamic_cast(cat.get())) { + if (auto p = dynamic_cast(cat.get())) { if (auto c = p->categorize(path_, data, cat_mapper_)) { best_ = c; index_ = index; @@ -126,7 +141,7 @@ void categorizer_job_::categorize_sequential( break; } - if (auto p = dynamic_cast(cat.get())) { + if (auto p = dynamic_cast(cat.get())) { if (auto job = p->job(path_, total_size_, cat_mapper_)) { seq_jobs_.emplace_back(index, std::move(job)); } @@ -180,7 +195,7 @@ class categorizer_manager_ final : public categorizer_manager_private { add_category(DEFAULT_CATEGORY, std::numeric_limits::max()); } - void add(std::shared_ptr c) override; + void add(std::shared_ptr c) override; categorizer_job job(std::filesystem::path const& path) const override; std::string_view category_name(fragment_category::value_type c) const override; @@ -194,12 +209,12 @@ class categorizer_manager_ final : public categorizer_manager_private { return rv; } - folly::dynamic category_metadata(fragment_category c) const override; + std::string category_metadata(fragment_category c) const override; - folly::dynamic - category_metadata_sample(fragment_category::value_type c) const override; + void set_metadata_requirements(fragment_category::value_type c, + std::string req) override; - std::vector> const& + std::vector> const& categorizers() const override { return categorizers_; } @@ -211,8 +226,6 @@ class categorizer_manager_ final : public categorizer_manager_private { } private: - folly::dynamic category_metadata_impl(fragment_category c, bool sample) const; - void add_category(std::string_view cat, size_t categorizer_index) { if (catmap_.emplace(cat, categories_.size()).second) { categories_.emplace_back(cat, categorizer_index); @@ -223,7 +236,7 @@ class categorizer_manager_ final : public categorizer_manager_private { logger& lgr_; LOG_PROXY_DECL(LoggerPolicy); - std::vector> categorizers_; + std::vector> categorizers_; // TODO: category descriptions? std::vector> categories_; std::unordered_map catmap_; @@ -234,8 +247,7 @@ fragment_category categorizer_manager::default_category() { } template -void categorizer_manager_::add( - std::shared_ptr c) { +void categorizer_manager_::add(std::shared_ptr c) { for (auto const& c : c->categories()) { add_category(c, categorizers_.size()); } @@ -258,34 +270,25 @@ std::string_view categorizer_manager_::category_name( } template -folly::dynamic -categorizer_manager_::category_metadata_impl(fragment_category c, - bool sample) const { +std::string categorizer_manager_::category_metadata( + fragment_category c) const { if (c.value() == 0) { - return folly::dynamic(); + return std::string(); } auto cat = DWARFS_NOTHROW(categories_.at(c.value())); auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second)); - std::optional maybe_category; - if (!sample) { - maybe_category.emplace(c); - } - - return categorizer->category_metadata(cat.first, maybe_category); + return categorizer->category_metadata(cat.first, c); } template -folly::dynamic categorizer_manager_::category_metadata( - fragment_category c) const { - return category_metadata_impl(c, false); -} +void categorizer_manager_::set_metadata_requirements( + fragment_category::value_type c, std::string req) { + auto cat = DWARFS_NOTHROW(categories_.at(c)); + auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second)); -template -folly::dynamic categorizer_manager_::category_metadata_sample( - fragment_category::value_type c) const { - return category_metadata_impl(fragment_category(c), true); + categorizer->set_metadata_requirements(cat.first, req); } categorizer_manager::categorizer_manager(logger& lgr) diff --git a/src/dwarfs/categorizer/binary_categorizer.cpp b/src/dwarfs/categorizer/binary_categorizer.cpp index 0754296d..73e33b65 100644 --- a/src/dwarfs/categorizer/binary_categorizer.cpp +++ b/src/dwarfs/categorizer/binary_categorizer.cpp @@ -64,12 +64,6 @@ class binary_categorizer_ final : public binary_categorizer_base { bool is_single_fragment() const override { return false; } - folly::dynamic - category_metadata(std::string_view, - std::optional) const override { - return folly::dynamic(); - } - private: LOG_PROXY_DECL(LoggerPolicy); }; diff --git a/src/dwarfs/categorizer/incompressible_categorizer.cpp b/src/dwarfs/categorizer/incompressible_categorizer.cpp index 162fb080..5af4eda4 100644 --- a/src/dwarfs/categorizer/incompressible_categorizer.cpp +++ b/src/dwarfs/categorizer/incompressible_categorizer.cpp @@ -166,12 +166,6 @@ class incompressible_categorizer_ final : public sequential_categorizer { bool is_single_fragment() const override { return true; } - folly::dynamic - category_metadata(std::string_view, - std::optional) const override { - return folly::dynamic(); - } - private: logger& lgr_; incompressible_categorizer_config const config_; diff --git a/src/dwarfs/categorizer/libmagic_categorizer.cpp b/src/dwarfs/categorizer/libmagic_categorizer.cpp index 87a0e883..f22ccd1c 100644 --- a/src/dwarfs/categorizer/libmagic_categorizer.cpp +++ b/src/dwarfs/categorizer/libmagic_categorizer.cpp @@ -149,12 +149,6 @@ class libmagic_categorizer_ final : public libmagic_categorizer_base { bool is_single_fragment() const override { return true; } - folly::dynamic - category_metadata(std::string_view, - std::optional) const override { - return folly::dynamic(); - } - private: LOG_PROXY_DECL(LoggerPolicy); magic_wrapper m_; diff --git a/src/dwarfs/categorizer/pcmaudio_categorizer.cpp b/src/dwarfs/categorizer/pcmaudio_categorizer.cpp index ef2566c2..e97d0e12 100644 --- a/src/dwarfs/categorizer/pcmaudio_categorizer.cpp +++ b/src/dwarfs/categorizer/pcmaudio_categorizer.cpp @@ -30,13 +30,17 @@ #include #include +#include #include +#include #include #include "dwarfs/categorizer.h" +#include "dwarfs/compression_metadata_requirements.h" #include "dwarfs/error.h" #include "dwarfs/logger.h" +#include "dwarfs/map_util.h" namespace dwarfs { @@ -46,7 +50,7 @@ namespace po = boost::program_options; namespace { constexpr std::string_view const METADATA_CATEGORY{"pcmaudio/metadata"}; -constexpr std::string_view const PCMAUDIO_CATEGORY{"pcmaudio/waveform"}; +constexpr std::string_view const WAVEFORM_CATEGORY{"pcmaudio/waveform"}; constexpr size_t const MIN_PCMAUDIO_SIZE{32}; @@ -65,33 +69,97 @@ enum class padding : uint8_t { MSB, }; -char const* endianness_string(endianness e) { +std::ostream& operator<<(std::ostream& os, endianness e) { switch (e) { case endianness::BIG: - return "big"; + os << "big"; + break; case endianness::LITTLE: - return "little"; + os << "little"; + break; + default: + throw std::runtime_error("internal error: unhandled endianness value"); } + return os; } -char const* signedness_string(signedness s) { - switch (s) { +std::optional parse_endianness(std::string_view e) { + static std::unordered_map const lookup{ + {"big", endianness::BIG}, + {"little", endianness::LITTLE}, + }; + return get_optional(lookup, e); +} + +std::optional parse_endianness_dyn(folly::dynamic const& e) { + return parse_endianness(e.asString()); +} + +std::ostream& operator<<(std::ostream& os, signedness e) { + switch (e) { case signedness::SIGNED: - return "signed"; + os << "signed"; + break; case signedness::UNSIGNED: - return "unsigned"; + os << "unsigned"; + break; + default: + throw std::runtime_error("internal error: unhandled signedness value"); } + return os; } -char const* padding_string(padding p) { - switch (p) { - case padding::LSB: - return "lsb"; - case padding::MSB: - return "msb"; - } +std::optional parse_signedness(std::string_view s) { + static std::unordered_map const lookup{ + {"signed", signedness::SIGNED}, + {"unsigned", signedness::UNSIGNED}, + }; + return get_optional(lookup, s); } +std::optional parse_signedness_dyn(folly::dynamic const& s) { + return parse_signedness(s.asString()); +} + +std::ostream& operator<<(std::ostream& os, padding e) { + switch (e) { + case padding::LSB: + os << "lsb"; + break; + case padding::MSB: + os << "msb"; + break; + default: + throw std::runtime_error("internal error: unhandled padding value"); + } + return os; +} + +std::optional parse_padding(std::string_view p) { + static std::unordered_map const lookup{ + {"lsb", padding::LSB}, + {"msb", padding::MSB}, + }; + return get_optional(lookup, p); +} + +std::optional parse_padding_dyn(folly::dynamic const& p) { + return parse_padding(p.asString()); +} + +} // namespace +} // namespace dwarfs + +template <> +struct fmt::formatter : ostream_formatter {}; +template <> +struct fmt::formatter : ostream_formatter {}; +template <> +struct fmt::formatter : ostream_formatter {}; + +namespace dwarfs { +namespace { + struct pcmaudio_metadata { endianness sample_endianness; signedness sample_signedness; @@ -325,9 +393,8 @@ class iff_parser final { }; std::ostream& operator<<(std::ostream& os, pcmaudio_metadata const& m) { - os << "[" << endianness_string(m.sample_endianness) << ", " - << signedness_string(m.sample_signedness) << ", " - << padding_string(m.sample_padding) << ", " + os << "[" << m.sample_endianness << ", " << m.sample_signedness << ", " + << m.sample_padding << ", " << "bits=" << static_cast(m.bits_per_sample) << ", " << "bytes=" << static_cast(m.bytes_per_sample) << ", " << "channels=" << static_cast(m.number_of_channels) << "]"; @@ -349,27 +416,16 @@ class pcmaudio_metadata_store { return it->second; } - folly::dynamic lookup(size_t ix) const { + std::string lookup(size_t ix) const { auto const& m = DWARFS_NOTHROW(forward_index_.at(ix)); folly::dynamic obj = folly::dynamic::object; - obj.insert("endianness", endianness_string(m.sample_endianness)); - obj.insert("signedness", signedness_string(m.sample_signedness)); - obj.insert("padding", padding_string(m.sample_padding)); + obj.insert("endianness", fmt::format("{}", m.sample_endianness)); + obj.insert("signedness", fmt::format("{}", m.sample_signedness)); + obj.insert("padding", fmt::format("{}", m.sample_padding)); obj.insert("bytes_per_sample", m.bytes_per_sample); obj.insert("bits_per_sample", m.bits_per_sample); obj.insert("number_of_channels", m.number_of_channels); - return obj; - } - - static folly::dynamic sample() { - folly::dynamic obj = folly::dynamic::object; - obj.insert("endianness", endianness_string(endianness::BIG)); - obj.insert("signedness", signedness_string(signedness::SIGNED)); - obj.insert("padding", padding_string(padding::LSB)); - obj.insert("bytes_per_sample", 2); - obj.insert("bits_per_sample", 16); - obj.insert("number_of_channels", 2); - return obj; + return folly::toJson(obj); } private: @@ -386,7 +442,20 @@ template class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base { public: pcmaudio_categorizer_(logger& lgr) - : LOG_PROXY_INIT(lgr) {} + : LOG_PROXY_INIT(lgr) { + waveform_req_.add_set("endianness", &pcmaudio_metadata::sample_endianness, + parse_endianness_dyn); + waveform_req_.add_set("signedness", &pcmaudio_metadata::sample_signedness, + parse_signedness_dyn); + waveform_req_.add_set("padding", &pcmaudio_metadata::sample_padding, + parse_padding_dyn); + waveform_req_.add_range("bytes_per_sample", + &pcmaudio_metadata::bytes_per_sample); + waveform_req_.add_range("bits_per_sample", + &pcmaudio_metadata::bits_per_sample); + waveform_req_.add_range("number_of_channels", + &pcmaudio_metadata::number_of_channels); + } inode_fragments categorize(fs::path const& path, std::span data, @@ -394,21 +463,19 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base { bool is_single_fragment() const override { return false; } - folly::dynamic - category_metadata(std::string_view category_name, - std::optional c) const override { - if (category_name == PCMAUDIO_CATEGORY) { - if (c) { - DWARFS_CHECK(c->has_subcategory(), - "expected PCMAUDIO to have subcategory"); - return meta_.rlock()->lookup(c->subcategory()); - } else { - return pcmaudio_metadata_store::sample(); - } + std::string category_metadata(std::string_view category_name, + fragment_category c) const override { + if (category_name == WAVEFORM_CATEGORY) { + DWARFS_CHECK(c.has_subcategory(), + "expected PCMAUDIO to have subcategory"); + return meta_.rlock()->lookup(c.subcategory()); } - return folly::dynamic(); + return std::string(); } + void set_metadata_requirements(std::string_view category_name, + std::string requirements) override; + private: bool check_aiff(inode_fragments& frag, fs::path const& path, std::span data, @@ -428,15 +495,20 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base { std::span data, category_mapper const& mapper) const; + bool check_metadata_requirements(pcmaudio_metadata const& meta, + std::string_view context, + fs::path const& path) const; + LOG_PROXY_DECL(LoggerPolicy); folly::Synchronized mutable meta_; + compression_metadata_requirements waveform_req_; }; std::span pcmaudio_categorizer_base::categories() const { static constexpr std::array const s_categories{ METADATA_CATEGORY, - PCMAUDIO_CATEGORY, + WAVEFORM_CATEGORY, }; return s_categories; } @@ -517,6 +589,10 @@ bool pcmaudio_categorizer_::check_aiff( return false; } + if (!check_metadata_requirements(meta, "AIFF", path)) { + return false; + } + meta_valid = true; LOG_TRACE << "[AIFF] " << path << ": meta=" << meta; @@ -553,7 +629,7 @@ bool pcmaudio_categorizer_::check_aiff( frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)), pcm_start); frag.emplace_back( - fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory), + fragment_category(mapper(WAVEFORM_CATEGORY), subcategory), pcm_length); if (pcm_start + pcm_length < data.size()) { @@ -710,6 +786,10 @@ bool pcmaudio_categorizer_::check_caf( return false; } + if (!check_metadata_requirements(meta, "CAF", path)) { + return false; + } + meta_valid = true; LOG_TRACE << "[CAF] " << path << ": meta=" << meta; @@ -736,7 +816,7 @@ bool pcmaudio_categorizer_::check_caf( frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)), pcm_start); frag.emplace_back( - fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory), + fragment_category(mapper(WAVEFORM_CATEGORY), subcategory), pcm_length); if (pcm_start + pcm_length < data.size()) { @@ -885,6 +965,10 @@ bool pcmaudio_categorizer_::check_wav_like( return false; } + if (!check_metadata_requirements(meta, FormatPolicy::format_name, path)) { + return false; + } + meta_valid = true; LOG_TRACE << "[" << FormatPolicy::format_name << "] " << path @@ -912,7 +996,7 @@ bool pcmaudio_categorizer_::check_wav_like( frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)), pcm_start); frag.emplace_back( - fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory), + fragment_category(mapper(WAVEFORM_CATEGORY), subcategory), pcm_length); if (pcm_start + pcm_length < data.size()) { @@ -927,6 +1011,20 @@ bool pcmaudio_categorizer_::check_wav_like( return false; } +template +bool pcmaudio_categorizer_::check_metadata_requirements( + pcmaudio_metadata const& meta, std::string_view context, + fs::path const& path) const { + try { + waveform_req_.check(meta); + } catch (std::exception const& e) { + LOG_WARN << "[" << context << "] " << path << ": " << e.what(); + return false; + } + + return true; +} + template inode_fragments pcmaudio_categorizer_::categorize( fs::path const& path, std::span data, @@ -954,6 +1052,19 @@ inode_fragments pcmaudio_categorizer_::categorize( return fragments; } +template +void pcmaudio_categorizer_::set_metadata_requirements( + std::string_view category_name, std::string requirements) { + if (!requirements.empty()) { + auto req = folly::parseJson(requirements); + if (category_name == WAVEFORM_CATEGORY) { + waveform_req_.parse(req); + } else { + compression_metadata_requirements().parse(req); + } + } +} + class pcmaudio_categorizer_factory : public categorizer_factory { public: std::string_view name() const override { return "pcmaudio"; } diff --git a/src/dwarfs/category_parser.cpp b/src/dwarfs/category_parser.cpp index 060fa020..1e4e32ba 100644 --- a/src/dwarfs/category_parser.cpp +++ b/src/dwarfs/category_parser.cpp @@ -21,6 +21,8 @@ #include +#include + #include "dwarfs/categorizer.h" #include "dwarfs/category_parser.h" diff --git a/src/dwarfs/compression/brotli.cpp b/src/dwarfs/compression/brotli.cpp index 6eaa3a6d..b4e2d7e4 100644 --- a/src/dwarfs/compression/brotli.cpp +++ b/src/dwarfs/compression/brotli.cpp @@ -49,8 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(const std::vector& data, - folly::dynamic /*meta*/) const override { + std::vector + compress(const std::vector& data, + std::string const* /*metadata*/) const override { std::vector compressed; compressed.resize(folly::kMaxVarintLength64 + ::BrotliEncoderMaxCompressedSize(data.size())); @@ -69,9 +70,9 @@ class brotli_block_compressor final : public block_compressor::impl { return compressed; } - std::vector - compress(std::vector&& data, folly::dynamic meta) const override { - return compress(data, std::move(meta)); + std::vector compress(std::vector&& data, + std::string const* metadata) const override { + return compress(data, metadata); } compression_type type() const override { return compression_type::BROTLI; } @@ -80,7 +81,7 @@ class brotli_block_compressor final : public block_compressor::impl { return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_); } - bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + std::string metadata_requirements() const override { return std::string(); } private: uint32_t const quality_; diff --git a/src/dwarfs/compression/flac.cpp b/src/dwarfs/compression/flac.cpp index 6c04c0bf..d1272c66 100644 --- a/src/dwarfs/compression/flac.cpp +++ b/src/dwarfs/compression/flac.cpp @@ -31,6 +31,7 @@ #include #include +#include #include "dwarfs/block_compressor.h" #include "dwarfs/compression.h" @@ -204,7 +205,14 @@ class flac_block_compressor final : public block_compressor::impl { } std::vector compress(const std::vector& data, - folly::dynamic meta) const override { + std::string const* metadata) const override { + if (!metadata) { + DWARFS_THROW(runtime_error, + "internal error: flac compression requires metadata"); + } + + auto meta = folly::parseJson(*metadata); + auto endianness = meta["endianness"].asString(); auto signedness = meta["signedness"].asString(); auto padding = meta["padding"].asString(); @@ -332,9 +340,9 @@ class flac_block_compressor final : public block_compressor::impl { return compressed; } - std::vector - compress(std::vector&& data, folly::dynamic meta) const override { - return compress(data, std::move(meta)); + std::vector compress(std::vector&& data, + std::string const* metadata) const override { + return compress(data, metadata); } compression_type type() const override { return compression_type::FLAC; } @@ -344,15 +352,20 @@ class flac_block_compressor final : public block_compressor::impl { exhaustive_ ? ", exhaustive" : ""); } - bool check_metadata(folly::dynamic meta) const override { - if (meta.empty()) { - return false; - } - - return meta.count("endianness") > 0 && meta.count("signedness") > 0 && - meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 && - meta.count("bits_per_sample") > 0 && - meta.count("number_of_channels") > 0; + std::string metadata_requirements() const override { + folly::dynamic req = folly::dynamic::object + // clang-format off + ("endianness", folly::dynamic::array("set", + folly::dynamic::array("big", "little"))) + ("signedness", folly::dynamic::array("set", + folly::dynamic::array("signed", "unsigned"))) + ("padding", folly::dynamic::array("set", + folly::dynamic::array("msb", "lsb"))) + ("bytes_per_sample", folly::dynamic::array("range", 1, 4)) + ("bits_per_sample", folly::dynamic::array("range", 8, 32)) + ("number_of_channels", folly::dynamic::array("range", 1, 8)) + ; // clang-format on + return folly::toJson(req); } private: diff --git a/src/dwarfs/compression/lz4.cpp b/src/dwarfs/compression/lz4.cpp index 7a0c7fc0..df32a01d 100644 --- a/src/dwarfs/compression/lz4.cpp +++ b/src/dwarfs/compression/lz4.cpp @@ -66,8 +66,9 @@ class lz4_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(const std::vector& data, - folly::dynamic /*meta*/) const override { + std::vector + compress(const std::vector& data, + std::string const* /*metadata*/) const override { std::vector compressed( sizeof(uint32_t) + LZ4_compressBound(folly::to(data.size()))); *reinterpret_cast(&compressed[0]) = data.size(); @@ -84,16 +85,16 @@ class lz4_block_compressor final : public block_compressor::impl { return compressed; } - std::vector - compress(std::vector&& data, folly::dynamic meta) const override { - return compress(data, std::move(meta)); + std::vector compress(std::vector&& data, + std::string const* metadata) const override { + return compress(data, metadata); } compression_type type() const override { return compression_type::LZ4; } std::string describe() const override { return Policy::describe(level_); } - bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + std::string metadata_requirements() const override { return std::string(); } private: const int level_; diff --git a/src/dwarfs/compression/lzma.cpp b/src/dwarfs/compression/lzma.cpp index 675de239..854fe705 100644 --- a/src/dwarfs/compression/lzma.cpp +++ b/src/dwarfs/compression/lzma.cpp @@ -64,17 +64,17 @@ class lzma_block_compressor final : public block_compressor::impl { } std::vector compress(const std::vector& data, - folly::dynamic meta) const override; - std::vector - compress(std::vector&& data, folly::dynamic meta) const override { - return compress(data, std::move(meta)); + std::string const* metadata) const override; + std::vector compress(std::vector&& data, + std::string const* metadata) const override { + return compress(data, metadata); } compression_type type() const override { return compression_type::LZMA; } std::string describe() const override { return description_; } - bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + std::string metadata_requirements() const override { return std::string(); } private: std::vector @@ -178,7 +178,7 @@ lzma_block_compressor::compress(const std::vector& data, std::vector lzma_block_compressor::compress(const std::vector& data, - folly::dynamic /*meta*/) const { + std::string const* /*metadata*/) const { std::vector best = compress(data, &filters_[1]); if (filters_[0].id != LZMA_VLI_UNKNOWN) { diff --git a/src/dwarfs/compression/null.cpp b/src/dwarfs/compression/null.cpp index b473cbaf..0c96449a 100644 --- a/src/dwarfs/compression/null.cpp +++ b/src/dwarfs/compression/null.cpp @@ -37,13 +37,15 @@ class null_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(const std::vector& data, - folly::dynamic /*meta*/) const override { + std::vector + compress(const std::vector& data, + std::string const* /*metadata*/) const override { return data; } - std::vector compress(std::vector&& data, - folly::dynamic /*meta*/) const override { + std::vector + compress(std::vector&& data, + std::string const* /*metadata*/) const override { return std::move(data); } @@ -51,7 +53,7 @@ class null_block_compressor final : public block_compressor::impl { std::string describe() const override { return "null"; } - bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + std::string metadata_requirements() const override { return std::string(); } }; class null_block_decompressor final : public block_decompressor::impl { diff --git a/src/dwarfs/compression/zstd.cpp b/src/dwarfs/compression/zstd.cpp index 6eb332c6..f220c362 100644 --- a/src/dwarfs/compression/zstd.cpp +++ b/src/dwarfs/compression/zstd.cpp @@ -55,11 +55,11 @@ class zstd_block_compressor final : public block_compressor::impl { } std::vector compress(const std::vector& data, - folly::dynamic meta) const override; + std::string const* metadata) const override; - std::vector - compress(std::vector&& data, folly::dynamic meta) const override { - return compress(data, std::move(meta)); + std::vector compress(std::vector&& data, + std::string const* metadata) const override { + return compress(data, std::move(metadata)); } compression_type type() const override { return compression_type::ZSTD; } @@ -68,7 +68,7 @@ class zstd_block_compressor final : public block_compressor::impl { return fmt::format("zstd [level={}]", level_); } - bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + std::string metadata_requirements() const override { return std::string(); } private: class scoped_context; @@ -147,7 +147,7 @@ std::weak_ptr std::vector zstd_block_compressor::compress(const std::vector& data, - folly::dynamic /*meta*/) const { + std::string const* /*metadata*/) const { std::vector compressed(ZSTD_compressBound(data.size())); scoped_context ctx(*ctxmgr_); auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(), diff --git a/src/dwarfs/compression_metadata_requirements.cpp b/src/dwarfs/compression_metadata_requirements.cpp new file mode 100644 index 00000000..02b4b44a --- /dev/null +++ b/src/dwarfs/compression_metadata_requirements.cpp @@ -0,0 +1,63 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include "dwarfs/compression_metadata_requirements.h" + +namespace dwarfs::detail { + +void check_dynamic_common(folly::dynamic const& dyn, + std::string_view expected_type, size_t expected_size, + std::string_view name) { + if (dyn.type() != folly::dynamic::ARRAY) { + throw std::runtime_error( + fmt::format("found non-array type for requirement '{}'", name)); + } + if (dyn.empty()) { + throw std::runtime_error( + fmt::format("unexpected empty value for requirement '{}'", name)); + } + if (auto type = dyn[0].asString(); type != expected_type) { + throw std::runtime_error( + fmt::format("invalid type '{}' for requirement '{}', expected '{}'", + type, name, expected_type)); + } + if (dyn.size() != expected_size) { + throw std::runtime_error( + fmt::format("unexpected size '{}' for requirement '{}', expected {}", + dyn.size(), name, expected_size)); + } +} + +void check_unsupported_metadata_requirements(folly::dynamic& req) { + if (!req.empty()) { + std::vector keys; + for (auto k : req.keys()) { + keys.emplace_back(k.asString()); + } + std::sort(keys.begin(), keys.end()); + throw std::runtime_error(fmt::format( + "unsupported metadata requirements: {}", folly::join(", ", keys))); + } +} + +} // namespace dwarfs::detail diff --git a/src/dwarfs/filesystem_writer.cpp b/src/dwarfs/filesystem_writer.cpp index 8f208dfd..423d89ee 100644 --- a/src/dwarfs/filesystem_writer.cpp +++ b/src/dwarfs/filesystem_writer.cpp @@ -102,8 +102,7 @@ class raw_fsblock : public fsblock::impl { wg.add_job([this, prom = std::move(prom)]() mutable { try { // TODO: metadata - auto tmp = std::make_shared( - bc_.compress(data_->vec(), folly::dynamic())); + auto tmp = std::make_shared(bc_.compress(data_->vec())); { std::lock_guard lock(mx_); diff --git a/src/mkdwarfs_main.cpp b/src/mkdwarfs_main.cpp index 174ff463..e46dec64 100644 --- a/src/mkdwarfs_main.cpp +++ b/src/mkdwarfs_main.cpp @@ -1025,11 +1025,13 @@ int mkdwarfs_main(int argc, sys_char** argv) { compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr]( auto cat, block_compressor const& bc) { - if (!bc.check_metadata(catmgr->category_metadata_sample(cat))) { + try { + catmgr->set_metadata_requirements(cat, bc.metadata_requirements()); + } catch (std::exception const& e) { throw std::runtime_error( fmt::format("compression '{}' cannot be used for category '{}': " - "insufficient metadata", - bc.describe(), catmgr->category_name(cat))); + "metadata requirements not met ({})", + bc.describe(), catmgr->category_name(cat), e.what())); } }); } catch (std::exception const& e) { diff --git a/test/flac_compressor_test.cpp b/test/flac_compressor_test.cpp index 03dc967a..6fdc4c19 100644 --- a/test/flac_compressor_test.cpp +++ b/test/flac_compressor_test.cpp @@ -24,6 +24,8 @@ #include +#include + #include "dwarfs/block_compressor.h" #include "dwarfs/pcm_sample_transformer.h" @@ -148,7 +150,7 @@ TEST(flac_compressor, basic) { block_compressor comp("flac"); - auto compressed = comp.compress(data, std::move(meta)); + auto compressed = comp.compress(data, folly::toJson(meta)); EXPECT_LT(compressed.size(), data.size() / 2); @@ -181,7 +183,7 @@ TEST_P(flac_param, combinations) { block_compressor comp("flac"); - auto compressed = comp.compress(data, std::move(meta)); + auto compressed = comp.compress(data, folly::toJson(meta)); EXPECT_LT(compressed.size(), data.size() / 2);