diff --git a/cmake/libdwarfs.cmake b/cmake/libdwarfs.cmake
index 9e6e7704..fc2a2913 100644
--- a/cmake/libdwarfs.cmake
+++ b/cmake/libdwarfs.cmake
@@ -146,6 +146,7 @@ add_library(
# src/writer/categorizer/binary_categorizer.cpp
src/writer/categorizer/fits_categorizer.cpp
+ src/writer/categorizer/hotness_categorizer.cpp
src/writer/categorizer/incompressible_categorizer.cpp
src/writer/categorizer/pcmaudio_categorizer.cpp
diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md
index 2e5665ff..57244cdf 100644
--- a/doc/mkdwarfs.md
+++ b/doc/mkdwarfs.md
@@ -401,6 +401,11 @@ Most other options are concerned with compression tuning:
you can switch to `ascii`, which is like `unicode`, but looks less
fancy.
+- `--hotness-list=`*file*:
+ A file containing the paths of all "hot" files for the "hotness"
+ categorizer. The paths must be relative to the `--input` path, but
+ may start with a leading `/`.
+
- `--incompressible-min-input-size=`*value*:
The minimum size of a file to be checked for incompressibility when
the `incompressible` categorizer is active.
diff --git a/include/dwarfs/writer/categorizer.h b/include/dwarfs/writer/categorizer.h
index c86512a7..a3927ea6 100644
--- a/include/dwarfs/writer/categorizer.h
+++ b/include/dwarfs/writer/categorizer.h
@@ -253,6 +253,7 @@ namespace detail {
void binary_categorizer_factory_registrar(categorizer_registry&);
void fits_categorizer_factory_registrar(categorizer_registry&);
+void hotness_categorizer_factory_registrar(categorizer_registry&);
void incompressible_categorizer_factory_registrar(categorizer_registry&);
void libmagic_categorizer_factory_registrar(categorizer_registry&);
void pcmaudio_categorizer_factory_registrar(categorizer_registry&);
diff --git a/src/writer/categorizer.cpp b/src/writer/categorizer.cpp
index 2dc65f6b..8352bb85 100644
--- a/src/writer/categorizer.cpp
+++ b/src/writer/categorizer.cpp
@@ -429,6 +429,7 @@ categorizer_registry::categorizer_registry() {
// binary_categorizer_factory_registrar(*this);
fits_categorizer_factory_registrar(*this);
+ hotness_categorizer_factory_registrar(*this);
incompressible_categorizer_factory_registrar(*this);
// libmagic_categorizer_factory_registrar(*this);
pcmaudio_categorizer_factory_registrar(*this);
diff --git a/src/writer/categorizer/hotness_categorizer.cpp b/src/writer/categorizer/hotness_categorizer.cpp
new file mode 100644
index 00000000..d0817773
--- /dev/null
+++ b/src/writer/categorizer/hotness_categorizer.cpp
@@ -0,0 +1,180 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+namespace dwarfs::writer {
+
+namespace po = boost::program_options;
+
+namespace {
+
+constexpr std::string_view const HOTNESS_CATEGORY{"hotness"};
+
+struct hotness_categorizer_config {
+ std::string hotness_list;
+};
+
+template
+class hotness_categorizer_ final : public random_access_categorizer {
+ public:
+ hotness_categorizer_(logger& lgr, hotness_categorizer_config const& cfg);
+
+ std::span categories() const override;
+
+ inode_fragments
+ categorize(file_path_info const& path, std::span data,
+ category_mapper const& mapper) const override;
+
+ bool
+ subcategory_less(fragment_category a, fragment_category b) const override;
+
+ private:
+ LOG_PROXY_DECL(LoggerPolicy);
+ std::unordered_set hotness_set_;
+ std::atomic mutable warned_no_list_{false};
+ hotness_categorizer_config const cfg_;
+};
+
+template
+hotness_categorizer_::hotness_categorizer_(
+ logger& lgr, hotness_categorizer_config const& cfg)
+ : LOG_PROXY_INIT(lgr)
+ , cfg_{cfg} {
+ auto const& file = cfg_.hotness_list;
+
+ if (!file.empty()) {
+ std::ifstream ifs{file};
+ if (!ifs) {
+ DWARFS_THROW(runtime_error,
+ fmt::format("failed to open file '{}'", file));
+ }
+
+ std::string line;
+ while (std::getline(ifs, line)) {
+ auto const path = std::filesystem::path{line}.relative_path();
+ LOG_DEBUG << "hotness categorizer: adding path '" << path << "'";
+ if (!hotness_set_.emplace(path.string()).second) {
+ DWARFS_THROW(runtime_error,
+ fmt::format("duplicate path in hotness list: '{}'", line));
+ }
+ }
+
+ if (hotness_set_.empty()) {
+ LOG_WARN << "hotness categorizer: empty hotness list";
+ }
+ }
+}
+
+template
+std::span
+hotness_categorizer_::categories() const {
+ static constexpr std::array const s_categories{
+ HOTNESS_CATEGORY,
+ };
+ return s_categories;
+}
+
+template
+inode_fragments hotness_categorizer_::categorize(
+ file_path_info const& path, std::span data,
+ category_mapper const& mapper) const {
+ inode_fragments fragments;
+
+ if (!hotness_set_.empty()) {
+ auto const rel_path = path.relative_path();
+
+ LOG_DEBUG << "hotness categorizer: checking path '" << rel_path << "'";
+
+ if (auto it = hotness_set_.find(rel_path.string());
+ it != hotness_set_.end()) {
+ fragments.emplace_back(fragment_category(mapper(HOTNESS_CATEGORY)),
+ data.size());
+ }
+ } else if (!warned_no_list_) {
+ if (cfg_.hotness_list.empty()) {
+ LOG_WARN << "hotness categorizer: no hotness list provided";
+ }
+ warned_no_list_ = true;
+ }
+
+ return fragments;
+}
+
+template
+bool hotness_categorizer_::subcategory_less(
+ fragment_category a, fragment_category b) const {
+ return a.subcategory() < b.subcategory();
+}
+
+class hotness_categorizer_factory : public categorizer_factory {
+ public:
+ hotness_categorizer_factory()
+ : opts_{std::make_shared(
+ "Hotness categorizer options")} {
+ // clang-format off
+ opts_->add_options()
+ ("hotness-list",
+ po::value(&cfg_.hotness_list)
+ ->value_name("file"),
+ "file with list of hot file paths")
+ ;
+ // clang-format on
+ }
+
+ std::string_view name() const override { return "hotness"; }
+
+ std::shared_ptr options() const override {
+ return opts_;
+ }
+
+ std::unique_ptr
+ create(logger& lgr, po::variables_map const& /*vm*/) const override {
+ return make_unique_logging_object(lgr, cfg_);
+ }
+
+ private:
+ hotness_categorizer_config cfg_;
+ std::shared_ptr opts_;
+};
+
+} // namespace
+
+REGISTER_CATEGORIZER_FACTORY(hotness_categorizer_factory)
+
+} // namespace dwarfs::writer