mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-17 00:10:03 -04:00
Better modeling of metadata requirements
This commit is contained in:
parent
e08faf2c0c
commit
9d5969adb7
@ -443,6 +443,12 @@ add_library(dwarfs_compression ${LIBDWARFS_COMPRESSION_SRC})
|
|||||||
add_library(dwarfs_categorizer ${LIBDWARFS_CATEGORIZER_SRC})
|
add_library(dwarfs_categorizer ${LIBDWARFS_CATEGORIZER_SRC})
|
||||||
add_library(dwarfs_tool src/dwarfs/tool.cpp)
|
add_library(dwarfs_tool src/dwarfs/tool.cpp)
|
||||||
|
|
||||||
|
add_library(dwarfs_compression_metadata src/dwarfs/compression_metadata_requirements.cpp)
|
||||||
|
|
||||||
|
target_link_libraries(dwarfs_compression_metadata folly)
|
||||||
|
target_link_libraries(dwarfs_categorizer dwarfs_compression_metadata)
|
||||||
|
target_link_libraries(dwarfs dwarfs_compression_metadata)
|
||||||
|
|
||||||
if(DWARFS_GIT_BUILD)
|
if(DWARFS_GIT_BUILD)
|
||||||
target_include_directories(dwarfs PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include)
|
target_include_directories(dwarfs PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||||
endif()
|
endif()
|
||||||
@ -810,7 +816,8 @@ target_link_libraries(metadata_thrift thrift_light)
|
|||||||
target_link_libraries(compression_thrift thrift_light)
|
target_link_libraries(compression_thrift thrift_light)
|
||||||
|
|
||||||
foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
|
foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
|
||||||
dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS})
|
dwarfs_compression_metadata dwarfs_tool
|
||||||
|
${BINARY_TARGETS} ${MAIN_TARGETS})
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
${tgt} SYSTEM
|
${tgt} SYSTEM
|
||||||
PRIVATE ${Boost_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS} ${INCLUDE_DIRS}
|
PRIVATE ${Boost_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS} ${INCLUDE_DIRS}
|
||||||
|
@ -33,8 +33,6 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <folly/dynamic.h>
|
|
||||||
|
|
||||||
#include "dwarfs/compression.h"
|
#include "dwarfs/compression.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -57,22 +55,30 @@ class block_compressor {
|
|||||||
block_compressor(block_compressor&& bc) = default;
|
block_compressor(block_compressor&& bc) = default;
|
||||||
block_compressor& operator=(block_compressor&& rhs) = default;
|
block_compressor& operator=(block_compressor&& rhs) = default;
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t> const& data) const {
|
||||||
compress(std::vector<uint8_t> const& data, folly::dynamic meta) const {
|
return impl_->compress(data, nullptr);
|
||||||
return impl_->compress(data, std::move(meta));
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data) const {
|
||||||
|
return impl_->compress(std::move(data), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> compress(std::vector<uint8_t> const& data,
|
||||||
|
std::string const& metadata) const {
|
||||||
|
return impl_->compress(data, &metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const {
|
compress(std::vector<uint8_t>&& data, std::string const& metadata) const {
|
||||||
return impl_->compress(std::move(data), std::move(meta));
|
return impl_->compress(std::move(data), &metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const { return impl_->type(); }
|
compression_type type() const { return impl_->type(); }
|
||||||
|
|
||||||
std::string describe() const { return impl_->describe(); }
|
std::string describe() const { return impl_->describe(); }
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic meta) const {
|
std::string metadata_requirements() const {
|
||||||
return impl_->check_metadata(std::move(meta));
|
return impl_->metadata_requirements();
|
||||||
}
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
@ -82,14 +88,16 @@ class block_compressor {
|
|||||||
virtual std::unique_ptr<impl> clone() const = 0;
|
virtual std::unique_ptr<impl> clone() const = 0;
|
||||||
|
|
||||||
virtual std::vector<uint8_t>
|
virtual std::vector<uint8_t>
|
||||||
compress(const std::vector<uint8_t>& data, folly::dynamic meta) const = 0;
|
compress(const std::vector<uint8_t>& data,
|
||||||
|
std::string const* metadata) const = 0;
|
||||||
virtual std::vector<uint8_t>
|
virtual std::vector<uint8_t>
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const = 0;
|
compress(std::vector<uint8_t>&& data,
|
||||||
|
std::string const* metadata) const = 0;
|
||||||
|
|
||||||
virtual compression_type type() const = 0;
|
virtual compression_type type() const = 0;
|
||||||
virtual std::string describe() const = 0;
|
virtual std::string describe() const = 0;
|
||||||
|
|
||||||
virtual bool check_metadata(folly::dynamic meta) const = 0;
|
virtual std::string metadata_requirements() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -31,8 +31,6 @@
|
|||||||
#include <span>
|
#include <span>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
#include <folly/dynamic.h>
|
|
||||||
|
|
||||||
#include "dwarfs/inode_fragments.h"
|
#include "dwarfs/inode_fragments.h"
|
||||||
|
|
||||||
namespace boost::program_options {
|
namespace boost::program_options {
|
||||||
@ -53,9 +51,10 @@ class categorizer {
|
|||||||
|
|
||||||
virtual std::span<std::string_view const> categories() const = 0;
|
virtual std::span<std::string_view const> categories() const = 0;
|
||||||
virtual bool is_single_fragment() const = 0;
|
virtual bool is_single_fragment() const = 0;
|
||||||
virtual folly::dynamic
|
virtual std::string
|
||||||
category_metadata(std::string_view category_name,
|
category_metadata(std::string_view category_name, fragment_category c) const;
|
||||||
std::optional<fragment_category> c) const = 0;
|
virtual void set_metadata_requirements(std::string_view category_name,
|
||||||
|
std::string requirements);
|
||||||
};
|
};
|
||||||
|
|
||||||
class random_access_categorizer : public categorizer {
|
class random_access_categorizer : public categorizer {
|
||||||
@ -128,7 +127,7 @@ class categorizer_manager {
|
|||||||
|
|
||||||
static fragment_category default_category();
|
static fragment_category default_category();
|
||||||
|
|
||||||
void add(std::shared_ptr<categorizer const> c) { impl_->add(std::move(c)); }
|
void add(std::shared_ptr<categorizer> c) { impl_->add(std::move(c)); }
|
||||||
|
|
||||||
categorizer_job job(std::filesystem::path const& path) const {
|
categorizer_job job(std::filesystem::path const& path) const {
|
||||||
return impl_->job(path);
|
return impl_->job(path);
|
||||||
@ -143,28 +142,28 @@ class categorizer_manager {
|
|||||||
return impl_->category_value(name);
|
return impl_->category_value(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
folly::dynamic category_metadata(fragment_category c) const {
|
std::string category_metadata(fragment_category c) const {
|
||||||
return impl_->category_metadata(c);
|
return impl_->category_metadata(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
folly::dynamic
|
void
|
||||||
category_metadata_sample(fragment_category::value_type c) const {
|
set_metadata_requirements(fragment_category::value_type c, std::string req) {
|
||||||
return impl_->category_metadata_sample(c);
|
impl_->set_metadata_requirements(c, std::move(req));
|
||||||
}
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
public:
|
public:
|
||||||
virtual ~impl() = default;
|
virtual ~impl() = default;
|
||||||
|
|
||||||
virtual void add(std::shared_ptr<categorizer const> c) = 0;
|
virtual void add(std::shared_ptr<categorizer> c) = 0;
|
||||||
virtual categorizer_job job(std::filesystem::path const& path) const = 0;
|
virtual categorizer_job job(std::filesystem::path const& path) const = 0;
|
||||||
virtual std::string_view
|
virtual std::string_view
|
||||||
category_name(fragment_category::value_type c) const = 0;
|
category_name(fragment_category::value_type c) const = 0;
|
||||||
virtual std::optional<fragment_category::value_type>
|
virtual std::optional<fragment_category::value_type>
|
||||||
category_value(std::string_view name) const = 0;
|
category_value(std::string_view name) const = 0;
|
||||||
virtual folly::dynamic category_metadata(fragment_category c) const = 0;
|
virtual std::string category_metadata(fragment_category c) const = 0;
|
||||||
virtual folly::dynamic
|
virtual void set_metadata_requirements(fragment_category::value_type c,
|
||||||
category_metadata_sample(fragment_category::value_type c) const = 0;
|
std::string req) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
291
include/dwarfs/compression_metadata_requirements.h
Normal file
291
include/dwarfs/compression_metadata_requirements.h
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include <folly/dynamic.h>
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::optional<T> value_parser(folly::dynamic const& v) {
|
||||||
|
if constexpr (std::is_same_v<T, std::string>) {
|
||||||
|
return v.asString();
|
||||||
|
} else {
|
||||||
|
static_assert(std::is_integral_v<T>);
|
||||||
|
return v.asInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_dynamic_common(folly::dynamic const& dyn,
|
||||||
|
std::string_view expected_type, size_t expected_size,
|
||||||
|
std::string_view name);
|
||||||
|
|
||||||
|
void check_unsupported_metadata_requirements(folly::dynamic& req);
|
||||||
|
|
||||||
|
template <typename T, typename ValueParser>
|
||||||
|
bool parse_metadata_requirements_set(T& container, folly::dynamic& req,
|
||||||
|
std::string_view name,
|
||||||
|
ValueParser const& value_parser) {
|
||||||
|
if (auto it = req.find(name); it != req.items().end()) {
|
||||||
|
detail::check_dynamic_common(it->second, "set", 2, name);
|
||||||
|
|
||||||
|
if (it->second[1].type() != folly::dynamic::ARRAY) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("non-array type argument for requirement '{}'", name));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto v : it->second[1]) {
|
||||||
|
if (auto maybe_value = value_parser(v)) {
|
||||||
|
if (!container.emplace(*maybe_value).second) {
|
||||||
|
throw std::runtime_error(fmt::format(
|
||||||
|
"duplicate value '{}' for requirement '{}'", v.asString(), name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
req.erase(it);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename ValueParser>
|
||||||
|
bool parse_metadata_requirements_range(T& min, T& max, folly::dynamic& req,
|
||||||
|
std::string_view name,
|
||||||
|
ValueParser const& value_parser) {
|
||||||
|
if (auto it = req.find(name); it != req.items().end()) {
|
||||||
|
detail::check_dynamic_common(it->second, "range", 3, name);
|
||||||
|
|
||||||
|
auto get_value = [&](std::string_view what, int index) {
|
||||||
|
if (auto maybe_value = value_parser(it->second[index])) {
|
||||||
|
return *maybe_value;
|
||||||
|
}
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("could not parse {} value '{}' for requirement '{}'",
|
||||||
|
what, it->second[index].asString(), name));
|
||||||
|
};
|
||||||
|
|
||||||
|
min = get_value("minimum", 1);
|
||||||
|
max = get_value("maximum", 2);
|
||||||
|
|
||||||
|
if (min > max) {
|
||||||
|
throw std::runtime_error(fmt::format(
|
||||||
|
"expected minimum '{}' to be less than or equal "
|
||||||
|
"to maximum '{}' for requirement '{}'",
|
||||||
|
it->second[1].asString(), it->second[2].asString(), name));
|
||||||
|
}
|
||||||
|
|
||||||
|
req.erase(it);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
class metadata_requirement_base {
|
||||||
|
public:
|
||||||
|
virtual ~metadata_requirement_base() = default;
|
||||||
|
|
||||||
|
metadata_requirement_base(std::string const& name)
|
||||||
|
: name_{name} {}
|
||||||
|
|
||||||
|
virtual void parse(folly::dynamic& req) = 0;
|
||||||
|
|
||||||
|
std::string_view name() const { return name_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string const name_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Meta>
|
||||||
|
class checked_metadata_requirement_base : public metadata_requirement_base {
|
||||||
|
public:
|
||||||
|
using metadata_requirement_base::metadata_requirement_base;
|
||||||
|
|
||||||
|
virtual void check(Meta const& m) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Meta, typename T, typename U>
|
||||||
|
class typed_metadata_requirement_base
|
||||||
|
: public checked_metadata_requirement_base<Meta> {
|
||||||
|
public:
|
||||||
|
using value_parser_type =
|
||||||
|
std::function<std::optional<T>(folly::dynamic const& v)>;
|
||||||
|
using member_ptr_type = U(Meta::*);
|
||||||
|
|
||||||
|
typed_metadata_requirement_base(std::string const& name, member_ptr_type mp)
|
||||||
|
: checked_metadata_requirement_base<Meta>(name)
|
||||||
|
, mp_{mp}
|
||||||
|
, value_parser_{detail::value_parser<T>} {}
|
||||||
|
|
||||||
|
typed_metadata_requirement_base(std::string const& name, member_ptr_type mp,
|
||||||
|
value_parser_type value_parser)
|
||||||
|
: checked_metadata_requirement_base<Meta>(name)
|
||||||
|
, mp_{mp}
|
||||||
|
, value_parser_{value_parser} {}
|
||||||
|
|
||||||
|
void check(Meta const& m) const override { check_value(m.*mp_); }
|
||||||
|
|
||||||
|
value_parser_type const& value_parser() const { return value_parser_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void check_value(T const& value) const = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
member_ptr_type mp_;
|
||||||
|
value_parser_type value_parser_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Meta, typename T, typename U = T>
|
||||||
|
class metadata_requirement_set
|
||||||
|
: public typed_metadata_requirement_base<Meta, T, U> {
|
||||||
|
public:
|
||||||
|
using typed_metadata_requirement_base<Meta, T,
|
||||||
|
U>::typed_metadata_requirement_base;
|
||||||
|
|
||||||
|
void parse(folly::dynamic& req) override {
|
||||||
|
set_.reset();
|
||||||
|
std::unordered_set<T> tmp;
|
||||||
|
if (parse_metadata_requirements_set(tmp, req, this->name(),
|
||||||
|
this->value_parser())) {
|
||||||
|
set_.emplace(std::move(tmp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void check_value(T const& value) const override {
|
||||||
|
if (set_ && set_->count(value) == 0) {
|
||||||
|
throw std::range_error(fmt::format("{} '{}' does not meet requirements",
|
||||||
|
this->name(), value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::optional<std::unordered_set<T>> set_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Meta, typename T, typename U = T>
|
||||||
|
class metadata_requirement_range
|
||||||
|
: public typed_metadata_requirement_base<Meta, T, U> {
|
||||||
|
public:
|
||||||
|
using typed_metadata_requirement_base<Meta, T,
|
||||||
|
U>::typed_metadata_requirement_base;
|
||||||
|
|
||||||
|
void parse(folly::dynamic& req) override {
|
||||||
|
range_.reset();
|
||||||
|
T min, max;
|
||||||
|
if (parse_metadata_requirements_range(min, max, req, this->name(),
|
||||||
|
this->value_parser())) {
|
||||||
|
range_.emplace(min, max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void check_value(T const& value) const override {
|
||||||
|
if (range_ && (value < range_->first || value > range_->second)) {
|
||||||
|
throw std::range_error(
|
||||||
|
fmt::format("{} '{}' does not meet requirements [{}..{}]",
|
||||||
|
this->name(), value, range_->first, range_->second));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::optional<std::pair<T, T>> range_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template <typename Meta = void>
|
||||||
|
class compression_metadata_requirements {
|
||||||
|
public:
|
||||||
|
compression_metadata_requirements() = default;
|
||||||
|
|
||||||
|
template <
|
||||||
|
typename F, typename U,
|
||||||
|
typename T = typename std::invoke_result_t<F, folly::dynamic>::value_type>
|
||||||
|
void add_set(std::string const& name, U(Meta::*mp), F&& value_parser) {
|
||||||
|
req_.emplace_back(
|
||||||
|
std::make_unique<detail::metadata_requirement_set<Meta, T, U>>(
|
||||||
|
name, mp, std::forward<F>(value_parser)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U>
|
||||||
|
void add_set(std::string const& name, U(Meta::*mp)) {
|
||||||
|
add_set(name, mp, detail::value_parser<T>);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <
|
||||||
|
typename F, typename U,
|
||||||
|
typename T = typename std::invoke_result_t<F, folly::dynamic>::value_type>
|
||||||
|
void add_range(std::string const& name, U(Meta::*mp), F&& value_parser) {
|
||||||
|
req_.emplace_back(
|
||||||
|
std::make_unique<detail::metadata_requirement_range<Meta, T, U>>(
|
||||||
|
name, mp, std::forward<F>(value_parser)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U>
|
||||||
|
void add_range(std::string const& name, U(Meta::*mp)) {
|
||||||
|
add_range(name, mp, detail::value_parser<T>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse(folly::dynamic req) const {
|
||||||
|
for (auto const& r : req_) {
|
||||||
|
r->parse(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
detail::check_unsupported_metadata_requirements(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
void check(Meta const& meta) const {
|
||||||
|
for (auto const& r : req_) {
|
||||||
|
r->check(meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::unique_ptr<detail::checked_metadata_requirement_base<Meta>>>
|
||||||
|
req_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class compression_metadata_requirements<void> {
|
||||||
|
public:
|
||||||
|
void parse(folly::dynamic req) const {
|
||||||
|
detail::check_unsupported_metadata_requirements(req);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace dwarfs
|
@ -26,10 +26,13 @@
|
|||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include <folly/String.h>
|
||||||
#include <folly/container/Enumerate.h>
|
#include <folly/container/Enumerate.h>
|
||||||
|
#include <folly/json.h>
|
||||||
|
|
||||||
#include "dwarfs/categorizer.h"
|
#include "dwarfs/categorizer.h"
|
||||||
#include "dwarfs/compiler.h"
|
#include "dwarfs/compiler.h"
|
||||||
|
#include "dwarfs/compression_metadata_requirements.h"
|
||||||
#include "dwarfs/error.h"
|
#include "dwarfs/error.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
|
|
||||||
@ -45,9 +48,21 @@ constexpr std::string_view const DEFAULT_CATEGORY{"<default>"};
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
categorizer::category_metadata(std::string_view, fragment_category) const {
|
||||||
|
return std::string();
|
||||||
|
}
|
||||||
|
|
||||||
|
void categorizer::set_metadata_requirements(std::string_view,
|
||||||
|
std::string requirements) {
|
||||||
|
if (!requirements.empty()) {
|
||||||
|
compression_metadata_requirements().parse(folly::parseJson(requirements));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class categorizer_manager_private : public categorizer_manager::impl {
|
class categorizer_manager_private : public categorizer_manager::impl {
|
||||||
public:
|
public:
|
||||||
virtual std::vector<std::shared_ptr<categorizer const>> const&
|
virtual std::vector<std::shared_ptr<categorizer>> const&
|
||||||
categorizers() const = 0;
|
categorizers() const = 0;
|
||||||
virtual fragment_category::value_type
|
virtual fragment_category::value_type
|
||||||
category(std::string_view cat) const = 0;
|
category(std::string_view cat) const = 0;
|
||||||
@ -100,7 +115,7 @@ void categorizer_job_<LoggerPolicy>::categorize_random_access(
|
|||||||
bool global_best = true;
|
bool global_best = true;
|
||||||
|
|
||||||
for (auto&& [index, cat] : folly::enumerate(mgr_.categorizers())) {
|
for (auto&& [index, cat] : folly::enumerate(mgr_.categorizers())) {
|
||||||
if (auto p = dynamic_cast<random_access_categorizer const*>(cat.get())) {
|
if (auto p = dynamic_cast<random_access_categorizer*>(cat.get())) {
|
||||||
if (auto c = p->categorize(path_, data, cat_mapper_)) {
|
if (auto c = p->categorize(path_, data, cat_mapper_)) {
|
||||||
best_ = c;
|
best_ = c;
|
||||||
index_ = index;
|
index_ = index;
|
||||||
@ -126,7 +141,7 @@ void categorizer_job_<LoggerPolicy>::categorize_sequential(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto p = dynamic_cast<sequential_categorizer const*>(cat.get())) {
|
if (auto p = dynamic_cast<sequential_categorizer*>(cat.get())) {
|
||||||
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
|
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
|
||||||
seq_jobs_.emplace_back(index, std::move(job));
|
seq_jobs_.emplace_back(index, std::move(job));
|
||||||
}
|
}
|
||||||
@ -180,7 +195,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
|||||||
add_category(DEFAULT_CATEGORY, std::numeric_limits<size_t>::max());
|
add_category(DEFAULT_CATEGORY, std::numeric_limits<size_t>::max());
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(std::shared_ptr<categorizer const> c) override;
|
void add(std::shared_ptr<categorizer> c) override;
|
||||||
categorizer_job job(std::filesystem::path const& path) const override;
|
categorizer_job job(std::filesystem::path const& path) const override;
|
||||||
std::string_view
|
std::string_view
|
||||||
category_name(fragment_category::value_type c) const override;
|
category_name(fragment_category::value_type c) const override;
|
||||||
@ -194,12 +209,12 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
folly::dynamic category_metadata(fragment_category c) const override;
|
std::string category_metadata(fragment_category c) const override;
|
||||||
|
|
||||||
folly::dynamic
|
void set_metadata_requirements(fragment_category::value_type c,
|
||||||
category_metadata_sample(fragment_category::value_type c) const override;
|
std::string req) override;
|
||||||
|
|
||||||
std::vector<std::shared_ptr<categorizer const>> const&
|
std::vector<std::shared_ptr<categorizer>> const&
|
||||||
categorizers() const override {
|
categorizers() const override {
|
||||||
return categorizers_;
|
return categorizers_;
|
||||||
}
|
}
|
||||||
@ -211,8 +226,6 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
folly::dynamic category_metadata_impl(fragment_category c, bool sample) const;
|
|
||||||
|
|
||||||
void add_category(std::string_view cat, size_t categorizer_index) {
|
void add_category(std::string_view cat, size_t categorizer_index) {
|
||||||
if (catmap_.emplace(cat, categories_.size()).second) {
|
if (catmap_.emplace(cat, categories_.size()).second) {
|
||||||
categories_.emplace_back(cat, categorizer_index);
|
categories_.emplace_back(cat, categorizer_index);
|
||||||
@ -223,7 +236,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
|||||||
|
|
||||||
logger& lgr_;
|
logger& lgr_;
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
std::vector<std::shared_ptr<categorizer const>> categorizers_;
|
std::vector<std::shared_ptr<categorizer>> categorizers_;
|
||||||
// TODO: category descriptions?
|
// TODO: category descriptions?
|
||||||
std::vector<std::pair<std::string_view, size_t>> categories_;
|
std::vector<std::pair<std::string_view, size_t>> categories_;
|
||||||
std::unordered_map<std::string_view, fragment_category::value_type> catmap_;
|
std::unordered_map<std::string_view, fragment_category::value_type> catmap_;
|
||||||
@ -234,8 +247,7 @@ fragment_category categorizer_manager::default_category() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void categorizer_manager_<LoggerPolicy>::add(
|
void categorizer_manager_<LoggerPolicy>::add(std::shared_ptr<categorizer> c) {
|
||||||
std::shared_ptr<categorizer const> c) {
|
|
||||||
for (auto const& c : c->categories()) {
|
for (auto const& c : c->categories()) {
|
||||||
add_category(c, categorizers_.size());
|
add_category(c, categorizers_.size());
|
||||||
}
|
}
|
||||||
@ -258,34 +270,25 @@ std::string_view categorizer_manager_<LoggerPolicy>::category_name(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
folly::dynamic
|
std::string categorizer_manager_<LoggerPolicy>::category_metadata(
|
||||||
categorizer_manager_<LoggerPolicy>::category_metadata_impl(fragment_category c,
|
fragment_category c) const {
|
||||||
bool sample) const {
|
|
||||||
if (c.value() == 0) {
|
if (c.value() == 0) {
|
||||||
return folly::dynamic();
|
return std::string();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto cat = DWARFS_NOTHROW(categories_.at(c.value()));
|
auto cat = DWARFS_NOTHROW(categories_.at(c.value()));
|
||||||
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
|
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
|
||||||
std::optional<fragment_category> maybe_category;
|
|
||||||
|
|
||||||
if (!sample) {
|
return categorizer->category_metadata(cat.first, c);
|
||||||
maybe_category.emplace(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
return categorizer->category_metadata(cat.first, maybe_category);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
folly::dynamic categorizer_manager_<LoggerPolicy>::category_metadata(
|
void categorizer_manager_<LoggerPolicy>::set_metadata_requirements(
|
||||||
fragment_category c) const {
|
fragment_category::value_type c, std::string req) {
|
||||||
return category_metadata_impl(c, false);
|
auto cat = DWARFS_NOTHROW(categories_.at(c));
|
||||||
}
|
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
categorizer->set_metadata_requirements(cat.first, req);
|
||||||
folly::dynamic categorizer_manager_<LoggerPolicy>::category_metadata_sample(
|
|
||||||
fragment_category::value_type c) const {
|
|
||||||
return category_metadata_impl(fragment_category(c), true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
categorizer_manager::categorizer_manager(logger& lgr)
|
categorizer_manager::categorizer_manager(logger& lgr)
|
||||||
|
@ -64,12 +64,6 @@ class binary_categorizer_ final : public binary_categorizer_base {
|
|||||||
|
|
||||||
bool is_single_fragment() const override { return false; }
|
bool is_single_fragment() const override { return false; }
|
||||||
|
|
||||||
folly::dynamic
|
|
||||||
category_metadata(std::string_view,
|
|
||||||
std::optional<fragment_category>) const override {
|
|
||||||
return folly::dynamic();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
};
|
};
|
||||||
|
@ -166,12 +166,6 @@ class incompressible_categorizer_ final : public sequential_categorizer {
|
|||||||
|
|
||||||
bool is_single_fragment() const override { return true; }
|
bool is_single_fragment() const override { return true; }
|
||||||
|
|
||||||
folly::dynamic
|
|
||||||
category_metadata(std::string_view,
|
|
||||||
std::optional<fragment_category>) const override {
|
|
||||||
return folly::dynamic();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
logger& lgr_;
|
logger& lgr_;
|
||||||
incompressible_categorizer_config const config_;
|
incompressible_categorizer_config const config_;
|
||||||
|
@ -149,12 +149,6 @@ class libmagic_categorizer_ final : public libmagic_categorizer_base {
|
|||||||
|
|
||||||
bool is_single_fragment() const override { return true; }
|
bool is_single_fragment() const override { return true; }
|
||||||
|
|
||||||
folly::dynamic
|
|
||||||
category_metadata(std::string_view,
|
|
||||||
std::optional<fragment_category>) const override {
|
|
||||||
return folly::dynamic();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
magic_wrapper m_;
|
magic_wrapper m_;
|
||||||
|
@ -30,13 +30,17 @@
|
|||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
|
||||||
#include <folly/Synchronized.h>
|
#include <folly/Synchronized.h>
|
||||||
|
#include <folly/json.h>
|
||||||
#include <folly/lang/Bits.h>
|
#include <folly/lang/Bits.h>
|
||||||
|
|
||||||
#include "dwarfs/categorizer.h"
|
#include "dwarfs/categorizer.h"
|
||||||
|
#include "dwarfs/compression_metadata_requirements.h"
|
||||||
#include "dwarfs/error.h"
|
#include "dwarfs/error.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
|
#include "dwarfs/map_util.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
@ -46,7 +50,7 @@ namespace po = boost::program_options;
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr std::string_view const METADATA_CATEGORY{"pcmaudio/metadata"};
|
constexpr std::string_view const METADATA_CATEGORY{"pcmaudio/metadata"};
|
||||||
constexpr std::string_view const PCMAUDIO_CATEGORY{"pcmaudio/waveform"};
|
constexpr std::string_view const WAVEFORM_CATEGORY{"pcmaudio/waveform"};
|
||||||
|
|
||||||
constexpr size_t const MIN_PCMAUDIO_SIZE{32};
|
constexpr size_t const MIN_PCMAUDIO_SIZE{32};
|
||||||
|
|
||||||
@ -65,33 +69,97 @@ enum class padding : uint8_t {
|
|||||||
MSB,
|
MSB,
|
||||||
};
|
};
|
||||||
|
|
||||||
char const* endianness_string(endianness e) {
|
std::ostream& operator<<(std::ostream& os, endianness e) {
|
||||||
switch (e) {
|
switch (e) {
|
||||||
case endianness::BIG:
|
case endianness::BIG:
|
||||||
return "big";
|
os << "big";
|
||||||
|
break;
|
||||||
case endianness::LITTLE:
|
case endianness::LITTLE:
|
||||||
return "little";
|
os << "little";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("internal error: unhandled endianness value");
|
||||||
}
|
}
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
char const* signedness_string(signedness s) {
|
std::optional<endianness> parse_endianness(std::string_view e) {
|
||||||
switch (s) {
|
static std::unordered_map<std::string_view, endianness> const lookup{
|
||||||
|
{"big", endianness::BIG},
|
||||||
|
{"little", endianness::LITTLE},
|
||||||
|
};
|
||||||
|
return get_optional(lookup, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<endianness> parse_endianness_dyn(folly::dynamic const& e) {
|
||||||
|
return parse_endianness(e.asString());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, signedness e) {
|
||||||
|
switch (e) {
|
||||||
case signedness::SIGNED:
|
case signedness::SIGNED:
|
||||||
return "signed";
|
os << "signed";
|
||||||
|
break;
|
||||||
case signedness::UNSIGNED:
|
case signedness::UNSIGNED:
|
||||||
return "unsigned";
|
os << "unsigned";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("internal error: unhandled signedness value");
|
||||||
}
|
}
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
char const* padding_string(padding p) {
|
std::optional<signedness> parse_signedness(std::string_view s) {
|
||||||
switch (p) {
|
static std::unordered_map<std::string_view, signedness> const lookup{
|
||||||
|
{"signed", signedness::SIGNED},
|
||||||
|
{"unsigned", signedness::UNSIGNED},
|
||||||
|
};
|
||||||
|
return get_optional(lookup, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<signedness> parse_signedness_dyn(folly::dynamic const& s) {
|
||||||
|
return parse_signedness(s.asString());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, padding e) {
|
||||||
|
switch (e) {
|
||||||
case padding::LSB:
|
case padding::LSB:
|
||||||
return "lsb";
|
os << "lsb";
|
||||||
|
break;
|
||||||
case padding::MSB:
|
case padding::MSB:
|
||||||
return "msb";
|
os << "msb";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("internal error: unhandled padding value");
|
||||||
}
|
}
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<padding> parse_padding(std::string_view p) {
|
||||||
|
static std::unordered_map<std::string_view, padding> const lookup{
|
||||||
|
{"lsb", padding::LSB},
|
||||||
|
{"msb", padding::MSB},
|
||||||
|
};
|
||||||
|
return get_optional(lookup, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<padding> parse_padding_dyn(folly::dynamic const& p) {
|
||||||
|
return parse_padding(p.asString());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace dwarfs
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<dwarfs::endianness> : ostream_formatter {};
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<dwarfs::signedness> : ostream_formatter {};
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<dwarfs::padding> : ostream_formatter {};
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct pcmaudio_metadata {
|
struct pcmaudio_metadata {
|
||||||
endianness sample_endianness;
|
endianness sample_endianness;
|
||||||
signedness sample_signedness;
|
signedness sample_signedness;
|
||||||
@ -325,9 +393,8 @@ class iff_parser final {
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, pcmaudio_metadata const& m) {
|
std::ostream& operator<<(std::ostream& os, pcmaudio_metadata const& m) {
|
||||||
os << "[" << endianness_string(m.sample_endianness) << ", "
|
os << "[" << m.sample_endianness << ", " << m.sample_signedness << ", "
|
||||||
<< signedness_string(m.sample_signedness) << ", "
|
<< m.sample_padding << ", "
|
||||||
<< padding_string(m.sample_padding) << ", "
|
|
||||||
<< "bits=" << static_cast<int>(m.bits_per_sample) << ", "
|
<< "bits=" << static_cast<int>(m.bits_per_sample) << ", "
|
||||||
<< "bytes=" << static_cast<int>(m.bytes_per_sample) << ", "
|
<< "bytes=" << static_cast<int>(m.bytes_per_sample) << ", "
|
||||||
<< "channels=" << static_cast<int>(m.number_of_channels) << "]";
|
<< "channels=" << static_cast<int>(m.number_of_channels) << "]";
|
||||||
@ -349,27 +416,16 @@ class pcmaudio_metadata_store {
|
|||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
folly::dynamic lookup(size_t ix) const {
|
std::string lookup(size_t ix) const {
|
||||||
auto const& m = DWARFS_NOTHROW(forward_index_.at(ix));
|
auto const& m = DWARFS_NOTHROW(forward_index_.at(ix));
|
||||||
folly::dynamic obj = folly::dynamic::object;
|
folly::dynamic obj = folly::dynamic::object;
|
||||||
obj.insert("endianness", endianness_string(m.sample_endianness));
|
obj.insert("endianness", fmt::format("{}", m.sample_endianness));
|
||||||
obj.insert("signedness", signedness_string(m.sample_signedness));
|
obj.insert("signedness", fmt::format("{}", m.sample_signedness));
|
||||||
obj.insert("padding", padding_string(m.sample_padding));
|
obj.insert("padding", fmt::format("{}", m.sample_padding));
|
||||||
obj.insert("bytes_per_sample", m.bytes_per_sample);
|
obj.insert("bytes_per_sample", m.bytes_per_sample);
|
||||||
obj.insert("bits_per_sample", m.bits_per_sample);
|
obj.insert("bits_per_sample", m.bits_per_sample);
|
||||||
obj.insert("number_of_channels", m.number_of_channels);
|
obj.insert("number_of_channels", m.number_of_channels);
|
||||||
return obj;
|
return folly::toJson(obj);
|
||||||
}
|
|
||||||
|
|
||||||
static folly::dynamic sample() {
|
|
||||||
folly::dynamic obj = folly::dynamic::object;
|
|
||||||
obj.insert("endianness", endianness_string(endianness::BIG));
|
|
||||||
obj.insert("signedness", signedness_string(signedness::SIGNED));
|
|
||||||
obj.insert("padding", padding_string(padding::LSB));
|
|
||||||
obj.insert("bytes_per_sample", 2);
|
|
||||||
obj.insert("bits_per_sample", 16);
|
|
||||||
obj.insert("number_of_channels", 2);
|
|
||||||
return obj;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -386,7 +442,20 @@ template <typename LoggerPolicy>
|
|||||||
class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
||||||
public:
|
public:
|
||||||
pcmaudio_categorizer_(logger& lgr)
|
pcmaudio_categorizer_(logger& lgr)
|
||||||
: LOG_PROXY_INIT(lgr) {}
|
: LOG_PROXY_INIT(lgr) {
|
||||||
|
waveform_req_.add_set("endianness", &pcmaudio_metadata::sample_endianness,
|
||||||
|
parse_endianness_dyn);
|
||||||
|
waveform_req_.add_set("signedness", &pcmaudio_metadata::sample_signedness,
|
||||||
|
parse_signedness_dyn);
|
||||||
|
waveform_req_.add_set("padding", &pcmaudio_metadata::sample_padding,
|
||||||
|
parse_padding_dyn);
|
||||||
|
waveform_req_.add_range<int>("bytes_per_sample",
|
||||||
|
&pcmaudio_metadata::bytes_per_sample);
|
||||||
|
waveform_req_.add_range<int>("bits_per_sample",
|
||||||
|
&pcmaudio_metadata::bits_per_sample);
|
||||||
|
waveform_req_.add_range<int>("number_of_channels",
|
||||||
|
&pcmaudio_metadata::number_of_channels);
|
||||||
|
}
|
||||||
|
|
||||||
inode_fragments
|
inode_fragments
|
||||||
categorize(fs::path const& path, std::span<uint8_t const> data,
|
categorize(fs::path const& path, std::span<uint8_t const> data,
|
||||||
@ -394,20 +463,18 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
|||||||
|
|
||||||
bool is_single_fragment() const override { return false; }
|
bool is_single_fragment() const override { return false; }
|
||||||
|
|
||||||
folly::dynamic
|
std::string category_metadata(std::string_view category_name,
|
||||||
category_metadata(std::string_view category_name,
|
fragment_category c) const override {
|
||||||
std::optional<fragment_category> c) const override {
|
if (category_name == WAVEFORM_CATEGORY) {
|
||||||
if (category_name == PCMAUDIO_CATEGORY) {
|
DWARFS_CHECK(c.has_subcategory(),
|
||||||
if (c) {
|
|
||||||
DWARFS_CHECK(c->has_subcategory(),
|
|
||||||
"expected PCMAUDIO to have subcategory");
|
"expected PCMAUDIO to have subcategory");
|
||||||
return meta_.rlock()->lookup(c->subcategory());
|
return meta_.rlock()->lookup(c.subcategory());
|
||||||
} else {
|
|
||||||
return pcmaudio_metadata_store::sample();
|
|
||||||
}
|
}
|
||||||
|
return std::string();
|
||||||
}
|
}
|
||||||
return folly::dynamic();
|
|
||||||
}
|
void set_metadata_requirements(std::string_view category_name,
|
||||||
|
std::string requirements) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool check_aiff(inode_fragments& frag, fs::path const& path,
|
bool check_aiff(inode_fragments& frag, fs::path const& path,
|
||||||
@ -428,15 +495,20 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
|||||||
std::span<uint8_t const> data,
|
std::span<uint8_t const> data,
|
||||||
category_mapper const& mapper) const;
|
category_mapper const& mapper) const;
|
||||||
|
|
||||||
|
bool check_metadata_requirements(pcmaudio_metadata const& meta,
|
||||||
|
std::string_view context,
|
||||||
|
fs::path const& path) const;
|
||||||
|
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
folly::Synchronized<pcmaudio_metadata_store> mutable meta_;
|
folly::Synchronized<pcmaudio_metadata_store> mutable meta_;
|
||||||
|
compression_metadata_requirements<pcmaudio_metadata> waveform_req_;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::span<std::string_view const>
|
std::span<std::string_view const>
|
||||||
pcmaudio_categorizer_base::categories() const {
|
pcmaudio_categorizer_base::categories() const {
|
||||||
static constexpr std::array const s_categories{
|
static constexpr std::array const s_categories{
|
||||||
METADATA_CATEGORY,
|
METADATA_CATEGORY,
|
||||||
PCMAUDIO_CATEGORY,
|
WAVEFORM_CATEGORY,
|
||||||
};
|
};
|
||||||
return s_categories;
|
return s_categories;
|
||||||
}
|
}
|
||||||
@ -517,6 +589,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_aiff(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!check_metadata_requirements(meta, "AIFF", path)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
meta_valid = true;
|
meta_valid = true;
|
||||||
|
|
||||||
LOG_TRACE << "[AIFF] " << path << ": meta=" << meta;
|
LOG_TRACE << "[AIFF] " << path << ": meta=" << meta;
|
||||||
@ -553,7 +629,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_aiff(
|
|||||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||||
pcm_start);
|
pcm_start);
|
||||||
frag.emplace_back(
|
frag.emplace_back(
|
||||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||||
pcm_length);
|
pcm_length);
|
||||||
|
|
||||||
if (pcm_start + pcm_length < data.size()) {
|
if (pcm_start + pcm_length < data.size()) {
|
||||||
@ -710,6 +786,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_caf(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!check_metadata_requirements(meta, "CAF", path)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
meta_valid = true;
|
meta_valid = true;
|
||||||
|
|
||||||
LOG_TRACE << "[CAF] " << path << ": meta=" << meta;
|
LOG_TRACE << "[CAF] " << path << ": meta=" << meta;
|
||||||
@ -736,7 +816,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_caf(
|
|||||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||||
pcm_start);
|
pcm_start);
|
||||||
frag.emplace_back(
|
frag.emplace_back(
|
||||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||||
pcm_length);
|
pcm_length);
|
||||||
|
|
||||||
if (pcm_start + pcm_length < data.size()) {
|
if (pcm_start + pcm_length < data.size()) {
|
||||||
@ -885,6 +965,10 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!check_metadata_requirements(meta, FormatPolicy::format_name, path)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
meta_valid = true;
|
meta_valid = true;
|
||||||
|
|
||||||
LOG_TRACE << "[" << FormatPolicy::format_name << "] " << path
|
LOG_TRACE << "[" << FormatPolicy::format_name << "] " << path
|
||||||
@ -912,7 +996,7 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
|||||||
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
frag.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||||
pcm_start);
|
pcm_start);
|
||||||
frag.emplace_back(
|
frag.emplace_back(
|
||||||
fragment_category(mapper(PCMAUDIO_CATEGORY), subcategory),
|
fragment_category(mapper(WAVEFORM_CATEGORY), subcategory),
|
||||||
pcm_length);
|
pcm_length);
|
||||||
|
|
||||||
if (pcm_start + pcm_length < data.size()) {
|
if (pcm_start + pcm_length < data.size()) {
|
||||||
@ -927,6 +1011,20 @@ bool pcmaudio_categorizer_<LoggerPolicy>::check_wav_like(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
bool pcmaudio_categorizer_<LoggerPolicy>::check_metadata_requirements(
|
||||||
|
pcmaudio_metadata const& meta, std::string_view context,
|
||||||
|
fs::path const& path) const {
|
||||||
|
try {
|
||||||
|
waveform_req_.check(meta);
|
||||||
|
} catch (std::exception const& e) {
|
||||||
|
LOG_WARN << "[" << context << "] " << path << ": " << e.what();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
||||||
fs::path const& path, std::span<uint8_t const> data,
|
fs::path const& path, std::span<uint8_t const> data,
|
||||||
@ -954,6 +1052,19 @@ inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
|||||||
return fragments;
|
return fragments;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void pcmaudio_categorizer_<LoggerPolicy>::set_metadata_requirements(
|
||||||
|
std::string_view category_name, std::string requirements) {
|
||||||
|
if (!requirements.empty()) {
|
||||||
|
auto req = folly::parseJson(requirements);
|
||||||
|
if (category_name == WAVEFORM_CATEGORY) {
|
||||||
|
waveform_req_.parse(req);
|
||||||
|
} else {
|
||||||
|
compression_metadata_requirements().parse(req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class pcmaudio_categorizer_factory : public categorizer_factory {
|
class pcmaudio_categorizer_factory : public categorizer_factory {
|
||||||
public:
|
public:
|
||||||
std::string_view name() const override { return "pcmaudio"; }
|
std::string_view name() const override { return "pcmaudio"; }
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include <folly/String.h>
|
||||||
|
|
||||||
#include "dwarfs/categorizer.h"
|
#include "dwarfs/categorizer.h"
|
||||||
#include "dwarfs/category_parser.h"
|
#include "dwarfs/category_parser.h"
|
||||||
|
|
||||||
|
@ -49,8 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl {
|
|||||||
return std::make_unique<brotli_block_compressor>(*this);
|
return std::make_unique<brotli_block_compressor>(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t>
|
||||||
folly::dynamic /*meta*/) const override {
|
compress(const std::vector<uint8_t>& data,
|
||||||
|
std::string const* /*metadata*/) const override {
|
||||||
std::vector<uint8_t> compressed;
|
std::vector<uint8_t> compressed;
|
||||||
compressed.resize(folly::kMaxVarintLength64 +
|
compressed.resize(folly::kMaxVarintLength64 +
|
||||||
::BrotliEncoderMaxCompressedSize(data.size()));
|
::BrotliEncoderMaxCompressedSize(data.size()));
|
||||||
@ -69,9 +70,9 @@ class brotli_block_compressor final : public block_compressor::impl {
|
|||||||
return compressed;
|
return compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
return compress(data, std::move(meta));
|
return compress(data, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const override { return compression_type::BROTLI; }
|
compression_type type() const override { return compression_type::BROTLI; }
|
||||||
@ -80,7 +81,7 @@ class brotli_block_compressor final : public block_compressor::impl {
|
|||||||
return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_);
|
return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
std::string metadata_requirements() const override { return std::string(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t const quality_;
|
uint32_t const quality_;
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include <folly/Varint.h>
|
#include <folly/Varint.h>
|
||||||
|
#include <folly/json.h>
|
||||||
|
|
||||||
#include "dwarfs/block_compressor.h"
|
#include "dwarfs/block_compressor.h"
|
||||||
#include "dwarfs/compression.h"
|
#include "dwarfs/compression.h"
|
||||||
@ -204,7 +205,14 @@ class flac_block_compressor final : public block_compressor::impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||||
folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
|
if (!metadata) {
|
||||||
|
DWARFS_THROW(runtime_error,
|
||||||
|
"internal error: flac compression requires metadata");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto meta = folly::parseJson(*metadata);
|
||||||
|
|
||||||
auto endianness = meta["endianness"].asString();
|
auto endianness = meta["endianness"].asString();
|
||||||
auto signedness = meta["signedness"].asString();
|
auto signedness = meta["signedness"].asString();
|
||||||
auto padding = meta["padding"].asString();
|
auto padding = meta["padding"].asString();
|
||||||
@ -332,9 +340,9 @@ class flac_block_compressor final : public block_compressor::impl {
|
|||||||
return compressed;
|
return compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
return compress(data, std::move(meta));
|
return compress(data, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const override { return compression_type::FLAC; }
|
compression_type type() const override { return compression_type::FLAC; }
|
||||||
@ -344,15 +352,20 @@ class flac_block_compressor final : public block_compressor::impl {
|
|||||||
exhaustive_ ? ", exhaustive" : "");
|
exhaustive_ ? ", exhaustive" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic meta) const override {
|
std::string metadata_requirements() const override {
|
||||||
if (meta.empty()) {
|
folly::dynamic req = folly::dynamic::object
|
||||||
return false;
|
// clang-format off
|
||||||
}
|
("endianness", folly::dynamic::array("set",
|
||||||
|
folly::dynamic::array("big", "little")))
|
||||||
return meta.count("endianness") > 0 && meta.count("signedness") > 0 &&
|
("signedness", folly::dynamic::array("set",
|
||||||
meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 &&
|
folly::dynamic::array("signed", "unsigned")))
|
||||||
meta.count("bits_per_sample") > 0 &&
|
("padding", folly::dynamic::array("set",
|
||||||
meta.count("number_of_channels") > 0;
|
folly::dynamic::array("msb", "lsb")))
|
||||||
|
("bytes_per_sample", folly::dynamic::array("range", 1, 4))
|
||||||
|
("bits_per_sample", folly::dynamic::array("range", 8, 32))
|
||||||
|
("number_of_channels", folly::dynamic::array("range", 1, 8))
|
||||||
|
; // clang-format on
|
||||||
|
return folly::toJson(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -66,8 +66,9 @@ class lz4_block_compressor final : public block_compressor::impl {
|
|||||||
return std::make_unique<lz4_block_compressor>(*this);
|
return std::make_unique<lz4_block_compressor>(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t>
|
||||||
folly::dynamic /*meta*/) const override {
|
compress(const std::vector<uint8_t>& data,
|
||||||
|
std::string const* /*metadata*/) const override {
|
||||||
std::vector<uint8_t> compressed(
|
std::vector<uint8_t> compressed(
|
||||||
sizeof(uint32_t) + LZ4_compressBound(folly::to<int>(data.size())));
|
sizeof(uint32_t) + LZ4_compressBound(folly::to<int>(data.size())));
|
||||||
*reinterpret_cast<uint32_t*>(&compressed[0]) = data.size();
|
*reinterpret_cast<uint32_t*>(&compressed[0]) = data.size();
|
||||||
@ -84,16 +85,16 @@ class lz4_block_compressor final : public block_compressor::impl {
|
|||||||
return compressed;
|
return compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
return compress(data, std::move(meta));
|
return compress(data, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const override { return compression_type::LZ4; }
|
compression_type type() const override { return compression_type::LZ4; }
|
||||||
|
|
||||||
std::string describe() const override { return Policy::describe(level_); }
|
std::string describe() const override { return Policy::describe(level_); }
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
std::string metadata_requirements() const override { return std::string(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const int level_;
|
const int level_;
|
||||||
|
@ -64,17 +64,17 @@ class lzma_block_compressor final : public block_compressor::impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||||
folly::dynamic meta) const override;
|
std::string const* metadata) const override;
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
return compress(data, std::move(meta));
|
return compress(data, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const override { return compression_type::LZMA; }
|
compression_type type() const override { return compression_type::LZMA; }
|
||||||
|
|
||||||
std::string describe() const override { return description_; }
|
std::string describe() const override { return description_; }
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
std::string metadata_requirements() const override { return std::string(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
@ -178,7 +178,7 @@ lzma_block_compressor::compress(const std::vector<uint8_t>& data,
|
|||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
lzma_block_compressor::compress(const std::vector<uint8_t>& data,
|
lzma_block_compressor::compress(const std::vector<uint8_t>& data,
|
||||||
folly::dynamic /*meta*/) const {
|
std::string const* /*metadata*/) const {
|
||||||
std::vector<uint8_t> best = compress(data, &filters_[1]);
|
std::vector<uint8_t> best = compress(data, &filters_[1]);
|
||||||
|
|
||||||
if (filters_[0].id != LZMA_VLI_UNKNOWN) {
|
if (filters_[0].id != LZMA_VLI_UNKNOWN) {
|
||||||
|
@ -37,13 +37,15 @@ class null_block_compressor final : public block_compressor::impl {
|
|||||||
return std::make_unique<null_block_compressor>(*this);
|
return std::make_unique<null_block_compressor>(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t>
|
||||||
folly::dynamic /*meta*/) const override {
|
compress(const std::vector<uint8_t>& data,
|
||||||
|
std::string const* /*metadata*/) const override {
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
std::vector<uint8_t>
|
||||||
folly::dynamic /*meta*/) const override {
|
compress(std::vector<uint8_t>&& data,
|
||||||
|
std::string const* /*metadata*/) const override {
|
||||||
return std::move(data);
|
return std::move(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,7 +53,7 @@ class null_block_compressor final : public block_compressor::impl {
|
|||||||
|
|
||||||
std::string describe() const override { return "null"; }
|
std::string describe() const override { return "null"; }
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
std::string metadata_requirements() const override { return std::string(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class null_block_decompressor final : public block_decompressor::impl {
|
class null_block_decompressor final : public block_decompressor::impl {
|
||||||
|
@ -55,11 +55,11 @@ class zstd_block_compressor final : public block_compressor::impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||||
folly::dynamic meta) const override;
|
std::string const* metadata) const override;
|
||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t> compress(std::vector<uint8_t>&& data,
|
||||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
std::string const* metadata) const override {
|
||||||
return compress(data, std::move(meta));
|
return compress(data, std::move(metadata));
|
||||||
}
|
}
|
||||||
|
|
||||||
compression_type type() const override { return compression_type::ZSTD; }
|
compression_type type() const override { return compression_type::ZSTD; }
|
||||||
@ -68,7 +68,7 @@ class zstd_block_compressor final : public block_compressor::impl {
|
|||||||
return fmt::format("zstd [level={}]", level_);
|
return fmt::format("zstd [level={}]", level_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_metadata(folly::dynamic /*meta*/) const override { return true; }
|
std::string metadata_requirements() const override { return std::string(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class scoped_context;
|
class scoped_context;
|
||||||
@ -147,7 +147,7 @@ std::weak_ptr<zstd_block_compressor::context_manager>
|
|||||||
|
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
zstd_block_compressor::compress(const std::vector<uint8_t>& data,
|
zstd_block_compressor::compress(const std::vector<uint8_t>& data,
|
||||||
folly::dynamic /*meta*/) const {
|
std::string const* /*metadata*/) const {
|
||||||
std::vector<uint8_t> compressed(ZSTD_compressBound(data.size()));
|
std::vector<uint8_t> compressed(ZSTD_compressBound(data.size()));
|
||||||
scoped_context ctx(*ctxmgr_);
|
scoped_context ctx(*ctxmgr_);
|
||||||
auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(),
|
auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(),
|
||||||
|
63
src/dwarfs/compression_metadata_requirements.cpp
Normal file
63
src/dwarfs/compression_metadata_requirements.cpp
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "dwarfs/compression_metadata_requirements.h"
|
||||||
|
|
||||||
|
namespace dwarfs::detail {
|
||||||
|
|
||||||
|
void check_dynamic_common(folly::dynamic const& dyn,
|
||||||
|
std::string_view expected_type, size_t expected_size,
|
||||||
|
std::string_view name) {
|
||||||
|
if (dyn.type() != folly::dynamic::ARRAY) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("found non-array type for requirement '{}'", name));
|
||||||
|
}
|
||||||
|
if (dyn.empty()) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("unexpected empty value for requirement '{}'", name));
|
||||||
|
}
|
||||||
|
if (auto type = dyn[0].asString(); type != expected_type) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("invalid type '{}' for requirement '{}', expected '{}'",
|
||||||
|
type, name, expected_type));
|
||||||
|
}
|
||||||
|
if (dyn.size() != expected_size) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
fmt::format("unexpected size '{}' for requirement '{}', expected {}",
|
||||||
|
dyn.size(), name, expected_size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_unsupported_metadata_requirements(folly::dynamic& req) {
|
||||||
|
if (!req.empty()) {
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
for (auto k : req.keys()) {
|
||||||
|
keys.emplace_back(k.asString());
|
||||||
|
}
|
||||||
|
std::sort(keys.begin(), keys.end());
|
||||||
|
throw std::runtime_error(fmt::format(
|
||||||
|
"unsupported metadata requirements: {}", folly::join(", ", keys)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace dwarfs::detail
|
@ -102,8 +102,7 @@ class raw_fsblock : public fsblock::impl {
|
|||||||
wg.add_job([this, prom = std::move(prom)]() mutable {
|
wg.add_job([this, prom = std::move(prom)]() mutable {
|
||||||
try {
|
try {
|
||||||
// TODO: metadata
|
// TODO: metadata
|
||||||
auto tmp = std::make_shared<block_data>(
|
auto tmp = std::make_shared<block_data>(bc_.compress(data_->vec()));
|
||||||
bc_.compress(data_->vec(), folly::dynamic()));
|
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard lock(mx_);
|
std::lock_guard lock(mx_);
|
||||||
|
@ -1025,11 +1025,13 @@ int mkdwarfs_main(int argc, sys_char** argv) {
|
|||||||
|
|
||||||
compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr](
|
compression_opt.visit_contextual([catmgr = options.inode.categorizer_mgr](
|
||||||
auto cat, block_compressor const& bc) {
|
auto cat, block_compressor const& bc) {
|
||||||
if (!bc.check_metadata(catmgr->category_metadata_sample(cat))) {
|
try {
|
||||||
|
catmgr->set_metadata_requirements(cat, bc.metadata_requirements());
|
||||||
|
} catch (std::exception const& e) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
fmt::format("compression '{}' cannot be used for category '{}': "
|
fmt::format("compression '{}' cannot be used for category '{}': "
|
||||||
"insufficient metadata",
|
"metadata requirements not met ({})",
|
||||||
bc.describe(), catmgr->category_name(cat)));
|
bc.describe(), catmgr->category_name(cat), e.what()));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
|
@ -24,6 +24,8 @@
|
|||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <folly/json.h>
|
||||||
|
|
||||||
#include "dwarfs/block_compressor.h"
|
#include "dwarfs/block_compressor.h"
|
||||||
#include "dwarfs/pcm_sample_transformer.h"
|
#include "dwarfs/pcm_sample_transformer.h"
|
||||||
|
|
||||||
@ -148,7 +150,7 @@ TEST(flac_compressor, basic) {
|
|||||||
|
|
||||||
block_compressor comp("flac");
|
block_compressor comp("flac");
|
||||||
|
|
||||||
auto compressed = comp.compress(data, std::move(meta));
|
auto compressed = comp.compress(data, folly::toJson(meta));
|
||||||
|
|
||||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||||
|
|
||||||
@ -181,7 +183,7 @@ TEST_P(flac_param, combinations) {
|
|||||||
|
|
||||||
block_compressor comp("flac");
|
block_compressor comp("flac");
|
||||||
|
|
||||||
auto compressed = comp.compress(data, std::move(meta));
|
auto compressed = comp.compress(data, folly::toJson(meta));
|
||||||
|
|
||||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user