From 5d191a6dbfd0011fcd10878f43a16306684bb6a9 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Thu, 3 Aug 2023 17:53:21 +0200 Subject: [PATCH] Add incompressible categorizer test --- CMakeLists.txt | 8 +- test/incompressible_categorizer_test.cpp | 245 +++++++++++++++++++++++ 2 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 test/incompressible_categorizer_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8adf47d3..d93c3401 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -571,6 +571,8 @@ if(WITH_TESTS) test/pcm_sample_transformer_test.cpp) add_executable(dwarfs_pcmaudio_categorizer_test test/pcmaudio_categorizer_test.cpp) + add_executable(dwarfs_incompressible_categorizer_test + test/incompressible_categorizer_test.cpp) target_link_libraries(dwarfs_test test_helpers gtest gtest_main) target_link_libraries(dwarfs_compat_test gtest gtest_main) @@ -580,10 +582,13 @@ if(WITH_TESTS) target_link_libraries(dwarfs_pcm_sample_transformer_test gtest gtest_main) target_link_libraries(dwarfs_pcmaudio_categorizer_test gtest gtest_main gmock_main "$") + target_link_libraries(dwarfs_incompressible_categorizer_test test_helpers + gtest gtest_main gmock_main + "$") list(APPEND BINARY_TARGETS dwarfs_test dwarfs_compat_test dwarfs_badfs_test dwarfs_tools_test dwarfs_utils_test dwarfs_pcm_sample_transformer_test - dwarfs_pcmaudio_categorizer_test) + dwarfs_pcmaudio_categorizer_test dwarfs_incompressible_categorizer_test) gtest_discover_tests(dwarfs_test DISCOVERY_TIMEOUT 120) gtest_discover_tests(dwarfs_compat_test DISCOVERY_TIMEOUT 120) @@ -592,6 +597,7 @@ if(WITH_TESTS) gtest_discover_tests(dwarfs_utils_test DISCOVERY_TIMEOUT 120) gtest_discover_tests(dwarfs_pcm_sample_transformer_test DISCOVERY_TIMEOUT 120) gtest_discover_tests(dwarfs_pcmaudio_categorizer_test DISCOVERY_TIMEOUT 120) + gtest_discover_tests(dwarfs_incompressible_categorizer_test DISCOVERY_TIMEOUT 120) if(FLAC_FOUND) add_executable(dwarfs_flac_compressor_test test/flac_compressor_test.cpp) diff --git a/test/incompressible_categorizer_test.cpp b/test/incompressible_categorizer_test.cpp new file mode 100644 index 00000000..2653fa50 --- /dev/null +++ b/test/incompressible_categorizer_test.cpp @@ -0,0 +1,245 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include +#include +#include + +// #include +#include + +#include + +#include + +#include "dwarfs/categorizer.h" +#include "dwarfs/mmap.h" + +#include "loremipsum.h" +#include "test_logger.h" + +using namespace dwarfs; +using dwarfs::test::loremipsum; +// using testing::MatchesRegex; + +namespace fs = std::filesystem; +namespace po = boost::program_options; + +namespace { + +std::string random_string(size_t size) { + using random_bytes_engine = + std::independent_bits_engine; + + static random_bytes_engine rbe; + + std::string data; + data.resize(size); + std::generate(begin(data), end(data), std::ref(rbe)); + + return data; +} + +std::vector make_data(std::string s) { + std::vector rv(s.size()); + std::memcpy(rv.data(), s.data(), s.size()); + return rv; +} + +} // namespace + +template +class incompressible_categorizer_fixture : public Base { + protected: + void SetUp() override { lgr.clear(); } + + void create_catmgr() { create_catmgr({}); } + + void create_catmgr(std::vector args) { + auto& catreg = categorizer_registry::instance(); + + po::options_description opts; + catreg.add_options(opts); + + args.insert(args.begin(), "program"); + + po::variables_map vm; + auto parsed = po::parse_command_line(args.size(), args.data(), opts); + + po::store(parsed, vm); + po::notify(vm); + + catmgr = std::make_shared(lgr); + + catmgr->add(catreg.create(lgr, "incompressible", vm)); + } + + // void TearDown() override { + // } + + public: + auto categorize(fs::path const& path, std::span data) { + auto job = catmgr->job(path); + job.set_total_size(data.size()); + job.categorize_random_access(data); + job.categorize_sequential(data); + return job.result(); + } + + std::shared_ptr catmgr; + test::test_logger lgr{logger::INFO}; +}; + +using incompressible_categorizer = + incompressible_categorizer_fixture<::testing::Test>; + +TEST_F(incompressible_categorizer, requirements) { + create_catmgr(); + try { + catmgr->set_metadata_requirements( + catmgr->category_value("incompressible").value(), + R"({"foo": ["set", ["bar"]]})"); + FAIL() << "expected std::runtime_error"; + } catch (std::runtime_error const& e) { + EXPECT_STREQ("unsupported metadata requirements: foo", e.what()); + } catch (...) { + FAIL() << "unexpected exception: " + << folly::exceptionStr(std::current_exception()); + } + + catmgr->set_metadata_requirements( + catmgr->category_value("incompressible").value(), R"({})"); +} + +TEST_F(incompressible_categorizer, categorize_incompressible) { + create_catmgr(); + + auto data = make_data(random_string(10'000)); + auto frag = categorize("random.txt", data); + ASSERT_EQ(1, frag.size()); + EXPECT_EQ("incompressible", + catmgr->category_name(frag.get_single_category().value())); +} + +TEST_F(incompressible_categorizer, categorize_default) { + create_catmgr(); + + auto data = make_data(loremipsum(10'000)); + auto frag = categorize("ipsum.txt", data); + EXPECT_TRUE(frag.empty()); +} + +TEST_F(incompressible_categorizer, categorize_fragments) { + create_catmgr( + {"--incompressible-block-size=8k", "--incompressible-fragments"}); + + // data: CCCCCCCCCCCCIIIIIIIIIIIICCCCCCCCCCCCIIIIIIIIIIIICCC + // block: 0-------1-------2-------3-------4-------5-------6-- + // frag: def-------------incomp--def-------------incomp--def + auto data = make_data(loremipsum(12 * 1024) + random_string(12 * 1024) + + loremipsum(12 * 1024) + random_string(12 * 1024) + + loremipsum(3 * 1024)); + + auto frag = categorize("mixed.txt", data); + ASSERT_EQ(5, frag.size()); + + std::vector> ref{ + {"", 16384}, {"incompressible", 8192}, {"", 16384}, + {"incompressible", 8192}, {"", 3072}, + }; + + for (size_t i = 0; i < ref.size(); ++i) { + auto const& r = ref[i]; + auto const& f = frag.span()[i]; + + EXPECT_EQ(r.first, catmgr->category_name(f.category().value())) << i; + EXPECT_EQ(r.second, f.length()) << i; + } +} + +TEST_F(incompressible_categorizer, min_input_size) { + create_catmgr({"--incompressible-min-input-size=1000"}); + + { + auto data = make_data(random_string(999)); + auto frag = categorize("random.txt", data); + EXPECT_TRUE(frag.empty()); + } + { + auto data = make_data(random_string(10'000)); + auto frag = categorize("random.txt", data); + ASSERT_EQ(1, frag.size()); + EXPECT_EQ("incompressible", + catmgr->category_name(frag.get_single_category().value())); + } +} + +using max_ratio_test = incompressible_categorizer_fixture< + ::testing::TestWithParam>>; + +TEST_P(max_ratio_test, max_ratio) { + auto [ratio, is_incompressible] = GetParam(); + auto arg = fmt::format("--incompressible-ratio={:f}", ratio); + + create_catmgr({arg.c_str()}); + + auto data = make_data(loremipsum(10'000)); + auto frag = categorize("ipsum.txt", data); + if (is_incompressible) { + ASSERT_EQ(1, frag.size()); + EXPECT_EQ("incompressible", + catmgr->category_name(frag.get_single_category().value())); + } else { + EXPECT_TRUE(frag.empty()); + } +} + +INSTANTIATE_TEST_SUITE_P(incompressible_categorizer, max_ratio_test, + ::testing::Values(std::make_pair(0.4, true), + std::make_pair(0.6, false))); + +using lz4_accel_test = incompressible_categorizer_fixture< + ::testing::TestWithParam>>; + +TEST_P(lz4_accel_test, lz4_acceleration) { + auto [accel, is_incompressible] = GetParam(); + auto arg = fmt::format("--incompressible-lz4-acceleration={}", accel); + + create_catmgr({arg.c_str()}); + + auto data = make_data(loremipsum(10'000)); + auto frag = categorize("ipsum.txt", data); + if (is_incompressible) { + ASSERT_EQ(1, frag.size()); + EXPECT_EQ("incompressible", + catmgr->category_name(frag.get_single_category().value())); + } else { + EXPECT_TRUE(frag.empty()); + } +} + +INSTANTIATE_TEST_SUITE_P(incompressible_categorizer, lz4_accel_test, + ::testing::Values(std::make_pair(1, false), + std::make_pair(10, false), + std::make_pair(100, true)));