mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-12 13:59:46 -04:00
Add incompressible categorizer test
This commit is contained in:
parent
35ae53b3fe
commit
5d191a6dbf
@ -571,6 +571,8 @@ if(WITH_TESTS)
|
||||
test/pcm_sample_transformer_test.cpp)
|
||||
add_executable(dwarfs_pcmaudio_categorizer_test
|
||||
test/pcmaudio_categorizer_test.cpp)
|
||||
add_executable(dwarfs_incompressible_categorizer_test
|
||||
test/incompressible_categorizer_test.cpp)
|
||||
|
||||
target_link_libraries(dwarfs_test test_helpers gtest gtest_main)
|
||||
target_link_libraries(dwarfs_compat_test gtest gtest_main)
|
||||
@ -580,10 +582,13 @@ if(WITH_TESTS)
|
||||
target_link_libraries(dwarfs_pcm_sample_transformer_test gtest gtest_main)
|
||||
target_link_libraries(dwarfs_pcmaudio_categorizer_test gtest gtest_main gmock_main
|
||||
"$<LINK_LIBRARY:WHOLE_ARCHIVE,dwarfs_categorizer>")
|
||||
target_link_libraries(dwarfs_incompressible_categorizer_test test_helpers
|
||||
gtest gtest_main gmock_main
|
||||
"$<LINK_LIBRARY:WHOLE_ARCHIVE,dwarfs_categorizer>")
|
||||
|
||||
list(APPEND BINARY_TARGETS dwarfs_test dwarfs_compat_test dwarfs_badfs_test
|
||||
dwarfs_tools_test dwarfs_utils_test dwarfs_pcm_sample_transformer_test
|
||||
dwarfs_pcmaudio_categorizer_test)
|
||||
dwarfs_pcmaudio_categorizer_test dwarfs_incompressible_categorizer_test)
|
||||
|
||||
gtest_discover_tests(dwarfs_test DISCOVERY_TIMEOUT 120)
|
||||
gtest_discover_tests(dwarfs_compat_test DISCOVERY_TIMEOUT 120)
|
||||
@ -592,6 +597,7 @@ if(WITH_TESTS)
|
||||
gtest_discover_tests(dwarfs_utils_test DISCOVERY_TIMEOUT 120)
|
||||
gtest_discover_tests(dwarfs_pcm_sample_transformer_test DISCOVERY_TIMEOUT 120)
|
||||
gtest_discover_tests(dwarfs_pcmaudio_categorizer_test DISCOVERY_TIMEOUT 120)
|
||||
gtest_discover_tests(dwarfs_incompressible_categorizer_test DISCOVERY_TIMEOUT 120)
|
||||
|
||||
if(FLAC_FOUND)
|
||||
add_executable(dwarfs_flac_compressor_test test/flac_compressor_test.cpp)
|
||||
|
245
test/incompressible_categorizer_test.cpp
Normal file
245
test/incompressible_categorizer_test.cpp
Normal file
@ -0,0 +1,245 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <filesystem>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
// #include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <folly/String.h>
|
||||
|
||||
#include "dwarfs/categorizer.h"
|
||||
#include "dwarfs/mmap.h"
|
||||
|
||||
#include "loremipsum.h"
|
||||
#include "test_logger.h"
|
||||
|
||||
using namespace dwarfs;
|
||||
using dwarfs::test::loremipsum;
|
||||
// using testing::MatchesRegex;
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
namespace po = boost::program_options;
|
||||
|
||||
namespace {
|
||||
|
||||
std::string random_string(size_t size) {
|
||||
using random_bytes_engine =
|
||||
std::independent_bits_engine<std::default_random_engine, CHAR_BIT,
|
||||
unsigned short>;
|
||||
|
||||
static random_bytes_engine rbe;
|
||||
|
||||
std::string data;
|
||||
data.resize(size);
|
||||
std::generate(begin(data), end(data), std::ref(rbe));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> make_data(std::string s) {
|
||||
std::vector<uint8_t> rv(s.size());
|
||||
std::memcpy(rv.data(), s.data(), s.size());
|
||||
return rv;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename Base>
|
||||
class incompressible_categorizer_fixture : public Base {
|
||||
protected:
|
||||
void SetUp() override { lgr.clear(); }
|
||||
|
||||
void create_catmgr() { create_catmgr({}); }
|
||||
|
||||
void create_catmgr(std::vector<char const*> args) {
|
||||
auto& catreg = categorizer_registry::instance();
|
||||
|
||||
po::options_description opts;
|
||||
catreg.add_options(opts);
|
||||
|
||||
args.insert(args.begin(), "program");
|
||||
|
||||
po::variables_map vm;
|
||||
auto parsed = po::parse_command_line(args.size(), args.data(), opts);
|
||||
|
||||
po::store(parsed, vm);
|
||||
po::notify(vm);
|
||||
|
||||
catmgr = std::make_shared<categorizer_manager>(lgr);
|
||||
|
||||
catmgr->add(catreg.create(lgr, "incompressible", vm));
|
||||
}
|
||||
|
||||
// void TearDown() override {
|
||||
// }
|
||||
|
||||
public:
|
||||
auto categorize(fs::path const& path, std::span<uint8_t const> data) {
|
||||
auto job = catmgr->job(path);
|
||||
job.set_total_size(data.size());
|
||||
job.categorize_random_access(data);
|
||||
job.categorize_sequential(data);
|
||||
return job.result();
|
||||
}
|
||||
|
||||
std::shared_ptr<categorizer_manager> catmgr;
|
||||
test::test_logger lgr{logger::INFO};
|
||||
};
|
||||
|
||||
using incompressible_categorizer =
|
||||
incompressible_categorizer_fixture<::testing::Test>;
|
||||
|
||||
TEST_F(incompressible_categorizer, requirements) {
|
||||
create_catmgr();
|
||||
try {
|
||||
catmgr->set_metadata_requirements(
|
||||
catmgr->category_value("incompressible").value(),
|
||||
R"({"foo": ["set", ["bar"]]})");
|
||||
FAIL() << "expected std::runtime_error";
|
||||
} catch (std::runtime_error const& e) {
|
||||
EXPECT_STREQ("unsupported metadata requirements: foo", e.what());
|
||||
} catch (...) {
|
||||
FAIL() << "unexpected exception: "
|
||||
<< folly::exceptionStr(std::current_exception());
|
||||
}
|
||||
|
||||
catmgr->set_metadata_requirements(
|
||||
catmgr->category_value("incompressible").value(), R"({})");
|
||||
}
|
||||
|
||||
TEST_F(incompressible_categorizer, categorize_incompressible) {
|
||||
create_catmgr();
|
||||
|
||||
auto data = make_data(random_string(10'000));
|
||||
auto frag = categorize("random.txt", data);
|
||||
ASSERT_EQ(1, frag.size());
|
||||
EXPECT_EQ("incompressible",
|
||||
catmgr->category_name(frag.get_single_category().value()));
|
||||
}
|
||||
|
||||
TEST_F(incompressible_categorizer, categorize_default) {
|
||||
create_catmgr();
|
||||
|
||||
auto data = make_data(loremipsum(10'000));
|
||||
auto frag = categorize("ipsum.txt", data);
|
||||
EXPECT_TRUE(frag.empty());
|
||||
}
|
||||
|
||||
TEST_F(incompressible_categorizer, categorize_fragments) {
|
||||
create_catmgr(
|
||||
{"--incompressible-block-size=8k", "--incompressible-fragments"});
|
||||
|
||||
// data: CCCCCCCCCCCCIIIIIIIIIIIICCCCCCCCCCCCIIIIIIIIIIIICCC
|
||||
// block: 0-------1-------2-------3-------4-------5-------6--
|
||||
// frag: def-------------incomp--def-------------incomp--def
|
||||
auto data = make_data(loremipsum(12 * 1024) + random_string(12 * 1024) +
|
||||
loremipsum(12 * 1024) + random_string(12 * 1024) +
|
||||
loremipsum(3 * 1024));
|
||||
|
||||
auto frag = categorize("mixed.txt", data);
|
||||
ASSERT_EQ(5, frag.size());
|
||||
|
||||
std::vector<std::pair<std::string_view, size_t>> ref{
|
||||
{"<default>", 16384}, {"incompressible", 8192}, {"<default>", 16384},
|
||||
{"incompressible", 8192}, {"<default>", 3072},
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < ref.size(); ++i) {
|
||||
auto const& r = ref[i];
|
||||
auto const& f = frag.span()[i];
|
||||
|
||||
EXPECT_EQ(r.first, catmgr->category_name(f.category().value())) << i;
|
||||
EXPECT_EQ(r.second, f.length()) << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(incompressible_categorizer, min_input_size) {
|
||||
create_catmgr({"--incompressible-min-input-size=1000"});
|
||||
|
||||
{
|
||||
auto data = make_data(random_string(999));
|
||||
auto frag = categorize("random.txt", data);
|
||||
EXPECT_TRUE(frag.empty());
|
||||
}
|
||||
{
|
||||
auto data = make_data(random_string(10'000));
|
||||
auto frag = categorize("random.txt", data);
|
||||
ASSERT_EQ(1, frag.size());
|
||||
EXPECT_EQ("incompressible",
|
||||
catmgr->category_name(frag.get_single_category().value()));
|
||||
}
|
||||
}
|
||||
|
||||
using max_ratio_test = incompressible_categorizer_fixture<
|
||||
::testing::TestWithParam<std::pair<double, bool>>>;
|
||||
|
||||
TEST_P(max_ratio_test, max_ratio) {
|
||||
auto [ratio, is_incompressible] = GetParam();
|
||||
auto arg = fmt::format("--incompressible-ratio={:f}", ratio);
|
||||
|
||||
create_catmgr({arg.c_str()});
|
||||
|
||||
auto data = make_data(loremipsum(10'000));
|
||||
auto frag = categorize("ipsum.txt", data);
|
||||
if (is_incompressible) {
|
||||
ASSERT_EQ(1, frag.size());
|
||||
EXPECT_EQ("incompressible",
|
||||
catmgr->category_name(frag.get_single_category().value()));
|
||||
} else {
|
||||
EXPECT_TRUE(frag.empty());
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(incompressible_categorizer, max_ratio_test,
|
||||
::testing::Values(std::make_pair(0.4, true),
|
||||
std::make_pair(0.6, false)));
|
||||
|
||||
using lz4_accel_test = incompressible_categorizer_fixture<
|
||||
::testing::TestWithParam<std::pair<int, bool>>>;
|
||||
|
||||
TEST_P(lz4_accel_test, lz4_acceleration) {
|
||||
auto [accel, is_incompressible] = GetParam();
|
||||
auto arg = fmt::format("--incompressible-lz4-acceleration={}", accel);
|
||||
|
||||
create_catmgr({arg.c_str()});
|
||||
|
||||
auto data = make_data(loremipsum(10'000));
|
||||
auto frag = categorize("ipsum.txt", data);
|
||||
if (is_incompressible) {
|
||||
ASSERT_EQ(1, frag.size());
|
||||
EXPECT_EQ("incompressible",
|
||||
catmgr->category_name(frag.get_single_category().value()));
|
||||
} else {
|
||||
EXPECT_TRUE(frag.empty());
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(incompressible_categorizer, lz4_accel_test,
|
||||
::testing::Values(std::make_pair(1, false),
|
||||
std::make_pair(10, false),
|
||||
std::make_pair(100, true)));
|
Loading…
x
Reference in New Issue
Block a user