Add Python scripting support

This commit is contained in:
Marcus Holland-Moritz 2020-12-05 22:47:53 +01:00
parent cf464d2cb1
commit b4afb2fcfd
6 changed files with 677 additions and 12 deletions

View File

@ -21,6 +21,7 @@ project(dwarfs)
cmake_minimum_required(VERSION 3.13.4)
option(WITH_TESTS "build with tests" OFF)
option(WITH_PYTHON "build with Python scripting support" OFF)
set(default_build_type "Release")
@ -38,10 +39,25 @@ set(DWARFS_VERSION
"${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
)
find_package(PkgConfig REQUIRED)
list(APPEND DWARFS_BOOST_MODULES date_time filesystem program_options system)
find_package(Boost 1.67 REQUIRED COMPONENTS date_time filesystem
program_options system)
if(WITH_PYTHON)
# TODO: would be nicer to be able to support a range of python versions
find_package(Python3 ${WITH_PYTHON_VERSION} EXACT REQUIRED COMPONENTS Development)
list(APPEND DWARFS_BOOST_MODULES
"python${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}")
message(STATUS "Enabling support for Python ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}")
endif()
find_package(Boost 1.67 REQUIRED COMPONENTS ${DWARFS_BOOST_MODULES})
if(WITH_PYTHON)
set(BOOST_PYTHON_LIBS ${Boost_LIBRARIES})
list(FILTER Boost_LIBRARIES EXCLUDE REGEX python)
list(FILTER BOOST_PYTHON_LIBS INCLUDE REGEX python)
endif()
find_package(PkgConfig REQUIRED)
pkg_check_modules(FUSE3 REQUIRED IMPORTED_TARGET fuse3>=3.4.1)
pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3)
@ -117,6 +133,9 @@ list(
src/dwarfs/util.cpp
src/dwarfs/worker_group.cpp)
if(WITH_PYTHON)
list(APPEND LIBDWARFS_SRC src/dwarfs/python_script.cpp)
endif()
add_library(dwarfs ${LIBDWARFS_SRC})
@ -251,7 +270,7 @@ target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS})
add_dependencies(metadata_thrift thrift_light)
foreach(tgt dwarfs ${BINARY_TARGETS})
target_include_directories(${tgt} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}
target_include_directories(${tgt} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}
${INCLUDE_DIRS})
target_include_directories(${tgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
@ -261,7 +280,8 @@ foreach(tgt dwarfs ${BINARY_TARGETS})
PRIVATE DWARFS_VERSION=\"${DWARFS_VERSION}\"
$<$<BOOL:${LIBLZ4_FOUND}>:DWARFS_HAVE_LIBLZ4>
$<$<BOOL:${LIBLZMA_FOUND}>:DWARFS_HAVE_LIBLZMA>
$<$<BOOL:${LIBZSTD_FOUND}>:DWARFS_HAVE_LIBZSTD>)
$<$<BOOL:${LIBZSTD_FOUND}>:DWARFS_HAVE_LIBZSTD>
$<$<BOOL:${WITH_PYTHON}>:DWARFS_HAVE_PYTHON>)
target_compile_options(${tgt} PRIVATE -Wall -Wextra -pedantic)
@ -287,6 +307,9 @@ foreach(tgt ${BINARY_TARGETS})
PkgConfig::LIBLZMA
PkgConfig::LIBZSTD)
if(WITH_PYTHON)
target_link_libraries(${tgt} ${BOOST_PYTHON_LIBS} ${Python3_LIBRARIES})
endif()
endforeach()
target_link_libraries(dwarfs-bin PkgConfig::FUSE3)

View File

@ -0,0 +1,53 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include "dwarfs/inode.h"
#include "dwarfs/script.h"
namespace dwarfs {
class logger;
class python_script : public script {
public:
python_script(logger& lgr, const std::string& code, const std::string& ctor);
~python_script();
bool has_configure() const override;
bool has_filter() const override;
bool has_transform() const override;
bool has_order() const override;
void configure(options_interface const& oi) override;
bool filter(entry_interface const& ei) override;
void transform(entry_interface& ei) override;
void order(inode_vector& iv) override;
private:
class impl;
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

30
scripts/example.py Normal file
View File

@ -0,0 +1,30 @@
class mkdwarfs(object):
def __init__(self):
logger.info("this is python!")
def configure(self, config):
config.enable_similarity()
config.set_order(file_order_mode.script, set_mode.override)
config.set_remove_empty_dirs(True, set_mode.default)
def filter(self, entry):
logger.debug(f"filter: {entry.path()} [{entry.type()}]")
if entry.type() == 'directory' and entry.name() == 'dev':
return False
return True
def transform(self, entry):
logger.debug(f"transform {entry.path()}")
entry.set_permissions(entry.permissions() & 0o7555)
return entry
def order(self, inodes):
logger.info("order")
for i in inodes:
logger.debug(f"inode: {i.similarity_hash()} {i.size()} {i.refcount()}")
for p in i.paths():
logger.debug(f" file: {p}")
return reversed(inodes)
def _something_private(self):
pass

View File

@ -0,0 +1,453 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <chrono>
#include <unordered_map>
#include <unordered_set>
#include <boost/algorithm/string.hpp>
#include <boost/python.hpp>
#include <sparsehash/dense_hash_map>
#include <fmt/format.h>
#include "dwarfs/entry.h"
#include "dwarfs/inode.h"
#include "dwarfs/logger.h"
#include "dwarfs/options_interface.h"
#include "dwarfs/python_script.h"
namespace dwarfs {
namespace py = boost::python;
namespace {
std::unordered_set<std::string> supported_methods{"configure", "filter",
"transform", "order"};
void init_python() {
static bool initialized = false;
if (!initialized) {
Py_Initialize();
initialized = true;
}
}
bool callable(py::object object) { return 1 == PyCallable_Check(object.ptr()); }
bool hasattr(py::object obj, const char* name) {
return PyObject_HasAttrString(obj.ptr(), name);
}
bool has_callable(py::object obj, char const* method) {
return hasattr(obj, method) && callable(obj.attr(method));
}
class py_logger {
public:
py_logger(logger& lgr)
: log_(lgr) {}
void error(std::string msg) { log_.error() << "[script] " << msg; }
void warn(std::string msg) { log_.warn() << "[script] " << msg; }
void info(std::string msg) { log_.info() << "[script] " << msg; }
void debug(std::string msg) { log_.debug() << "[script] " << msg; }
void trace(std::string msg) { log_.trace() << "[script] " << msg; }
private:
using log_proxy_t = log_proxy<debug_logger_policy>;
log_proxy_t log_;
};
template <typename T>
class basic_entry_wrapper {
public:
basic_entry_wrapper(T& entry)
: entry_(&entry) {}
size_t size() const { return entry_->size(); }
std::string path() const { return entry_->path(); }
std::string name() const { return entry_->name(); }
std::string type() const { return entry_->type_string(); }
uint16_t permissions() const { return entry_->get_permissions(); }
void set_permissions(uint16_t perm) { entry_->set_permissions(perm); }
uint16_t uid() const { return entry_->get_uid(); }
void set_uid(uint16_t uid) { entry_->set_uid(uid); }
uint16_t gid() const { return entry_->get_gid(); }
void set_gid(uint16_t gid) { entry_->set_gid(gid); }
uint64_t atime() const { return entry_->get_atime(); }
void set_atime(uint64_t atime) { entry_->set_atime(atime); }
uint64_t mtime() const { return entry_->get_mtime(); }
void set_mtime(uint64_t mtime) { entry_->set_mtime(mtime); }
uint64_t ctime() const { return entry_->get_ctime(); }
void set_ctime(uint64_t ctime) { entry_->set_ctime(ctime); }
private:
T* entry_;
};
using entry_wrapper = basic_entry_wrapper<entry_interface const>;
using mutable_entry_wrapper = basic_entry_wrapper<entry_interface>;
class inode_wrapper {
public:
inode_wrapper(inode const* ino)
: ino_(ino) {}
size_t similarity_hash() const { return ino_->similarity_hash(); }
size_t refcount() const { return ino_->files().size(); }
py::list paths() const {
py::list ps;
auto& fs = ino_->files();
for (auto& f : fs) {
ps.append(f->path());
}
return ps;
}
size_t size() const { return ino_->any()->size(); }
inode const* get() const { return ino_; }
private:
inode const* ino_;
};
} // namespace
class python_script::impl {
public:
impl(logger& lgr, const std::string& code, const std::string& ctor);
~impl();
void configure(options_interface const& oi);
bool filter(entry_interface const& ei);
void transform(entry_interface& ei);
void order(inode_vector& iv);
bool has_configure() const { return has_configure_; }
bool has_filter() const { return has_filter_; }
bool has_transform() const { return has_transform_; }
bool has_order() const { return has_order_; }
private:
void check_instance_methods(py::object obj) const;
void log_py_error() const;
using log_proxy_t = log_proxy<debug_logger_policy>;
using clock = std::chrono::steady_clock;
class timer {
public:
timer(clock::duration& d)
: start_(clock::now())
, d_(d) {}
~timer() { d_ += clock::now() - start_; }
private:
clock::time_point start_;
clock::duration& d_;
};
log_proxy_t log_;
py_logger pylog_;
bool has_configure_{false};
bool has_filter_{false};
bool has_transform_{false};
bool has_order_{false};
py::object instance_;
py::object main_module_;
py::object main_namespace_;
clock::duration configure_time_{};
clock::duration filter_time_{};
clock::duration transform_time_{};
clock::duration order_time_{};
};
python_script::impl::impl(logger& lgr, const std::string& code,
const std::string& ctor)
: log_(lgr)
, pylog_(lgr) {
try {
init_python();
main_module_ = py::import("__main__");
main_namespace_ = main_module_.attr("__dict__");
py::scope scope(main_module_);
main_namespace_["dwarfs_logger"] =
py::class_<py_logger, boost::noncopyable>("dwarfs_logger", py::no_init)
.def("error", &py_logger::error)
.def("warn", &py_logger::warn)
.def("info", &py_logger::info)
.def("debug", &py_logger::debug)
.def("trace", &py_logger::trace);
main_namespace_["file_order_mode"] =
py::enum_<file_order_mode>("file_order_mode")
.value("none", file_order_mode::NONE)
.value("path", file_order_mode::PATH)
.value("script", file_order_mode::SCRIPT)
.value("similarity", file_order_mode::SIMILARITY);
main_namespace_["set_mode"] =
py::enum_<options_interface::set_mode>("set_mode")
.value("default", options_interface::DEFAULT)
.value("override", options_interface::OVERRIDE);
main_namespace_["dwarfs_options"] =
py::class_<options_interface, boost::noncopyable>("dwarfs_options",
py::no_init)
.def("enable_similarity", &options_interface::enable_similarity)
.def("set_order", &options_interface::set_order)
.def("set_remove_empty_dirs",
&options_interface::set_remove_empty_dirs);
main_namespace_["inode_wrapper"] =
py::class_<inode_wrapper, std::shared_ptr<inode_wrapper>>(
"inode_wrapper", py::no_init)
.def("similarity_hash", &inode_wrapper::similarity_hash)
.def("refcount", &inode_wrapper::refcount)
.def("paths", &inode_wrapper::paths)
.def("size", &inode_wrapper::size);
main_namespace_["entry_wrapper"] =
py::class_<entry_wrapper, std::shared_ptr<entry_wrapper>>(
"entry_wrapper", py::no_init)
.def("name", &entry_wrapper::name)
.def("type", &entry_wrapper::type)
.def("path", &entry_wrapper::path)
.def("size", &entry_wrapper::size)
.def("permissions", &entry_wrapper::permissions)
.def("uid", &entry_wrapper::uid)
.def("gid", &entry_wrapper::gid)
.def("atime", &entry_wrapper::atime)
.def("mtime", &entry_wrapper::mtime)
.def("ctime", &entry_wrapper::ctime);
main_namespace_["mutable_entry_wrapper"] =
py::class_<mutable_entry_wrapper,
std::shared_ptr<mutable_entry_wrapper>>(
"mutable_entry_wrapper", py::no_init)
.def("name", &mutable_entry_wrapper::name)
.def("type", &mutable_entry_wrapper::type)
.def("path", &mutable_entry_wrapper::path)
.def("size", &mutable_entry_wrapper::size)
.def("permissions", &mutable_entry_wrapper::permissions)
.def("uid", &mutable_entry_wrapper::uid)
.def("gid", &mutable_entry_wrapper::gid)
.def("atime", &mutable_entry_wrapper::atime)
.def("mtime", &mutable_entry_wrapper::mtime)
.def("ctime", &mutable_entry_wrapper::ctime)
.def("set_permissions", &mutable_entry_wrapper::set_permissions)
.def("set_uid", &mutable_entry_wrapper::set_uid)
.def("set_gid", &mutable_entry_wrapper::set_gid)
.def("set_atime", &mutable_entry_wrapper::set_atime)
.def("set_mtime", &mutable_entry_wrapper::set_mtime)
.def("set_ctime", &mutable_entry_wrapper::set_ctime);
main_namespace_["logger"] = py::ptr(&pylog_);
py::exec(code.c_str(), main_namespace_);
instance_ = py::eval(ctor.c_str(), main_namespace_);
check_instance_methods(instance_);
has_configure_ = has_callable(instance_, "configure");
has_filter_ = has_callable(instance_, "filter");
has_transform_ = has_callable(instance_, "transform");
has_order_ = has_callable(instance_, "order");
} catch (py::error_already_set) {
log_py_error();
throw std::runtime_error("error initializing script");
}
}
void python_script::impl::log_py_error() const {
PyObject *exc, *val, *tb;
PyErr_Fetch(&exc, &val, &tb);
PyErr_NormalizeException(&exc, &val, &tb);
py::handle<> hexc(exc), hval(py::allow_null(val)), htb(py::allow_null(tb));
if (!hval) {
log_.error() << std::string(py::extract<std::string>(py::str(hexc)));
} else {
py::object traceback(py::import("traceback"));
py::object format_exception(traceback.attr("format_exception"));
py::list formatted_list(format_exception(hexc, hval, htb));
for (int count = 0; count < len(formatted_list); ++count) {
log_.error() << std::string(
py::extract<std::string>(formatted_list[count].slice(0, -1)));
}
}
}
void python_script::impl::check_instance_methods(py::object obj) const {
for (py::stl_input_iterator<py::str>
it(py::object(py::handle<>(PyObject_Dir(obj.ptr())))),
end;
it != end; ++it) {
if (!it->startswith("_") && callable(obj.attr(*it))) {
std::string name{py::extract<char const*>(*it)};
if (supported_methods.find(name) == supported_methods.end()) {
log_.warn() << "unknown method '" << name << "' found in Python class";
}
}
}
}
python_script::impl::~impl() {
std::vector<std::string> timings;
auto add_timing = [&](bool flag, std::string_view name, auto const& d) {
using floatsec = std::chrono::duration<float>;
if (flag) {
timings.push_back(
fmt::format("{0} {1:.2f}s", name,
std::chrono::duration_cast<floatsec>(d).count()));
}
};
add_timing(has_configure_, "configure", configure_time_);
add_timing(has_filter_, "filter", filter_time_);
add_timing(has_transform_, "transform", transform_time_);
add_timing(has_order_, "order", order_time_);
log_.info() << "script time: " << boost::join(timings, ", ");
// nothing else, really, as boost::python docs forbid using Py_Finalize
}
void python_script::impl::configure(options_interface const& oi) {
timer tmr(configure_time_);
try {
instance_.attr("configure")(py::ptr(&oi));
} catch (py::error_already_set) {
log_py_error();
throw std::runtime_error("error in configure");
}
}
bool python_script::impl::filter(entry_interface const& ei) {
timer tmr(filter_time_);
try {
return py::extract<bool>(
instance_.attr("filter")(std::make_shared<entry_wrapper>(ei)));
} catch (py::error_already_set) {
log_py_error();
throw std::runtime_error("error filtering entry");
}
}
void python_script::impl::transform(entry_interface& ei) {
timer tmr(transform_time_);
try {
instance_.attr("transform")(std::make_shared<mutable_entry_wrapper>(ei));
} catch (py::error_already_set) {
log_py_error();
throw std::runtime_error("error transforming entry");
}
}
void python_script::impl::order(inode_vector& iv) {
timer tmr(order_time_);
try {
py::list files;
{
auto td = log_.timed_debug();
for (size_t i = 0; i < iv.size(); ++i) {
files.append(std::make_shared<inode_wrapper>(iv[i].get()));
}
td << "prepared files for ordering";
}
py::object ordered;
{
auto td = log_.timed_debug();
ordered = instance_.attr("order")(files);
td << "ordered files in script code";
}
google::dense_hash_map<inode const*, size_t> priority(iv.size());
priority.set_empty_key(nullptr);
auto td = log_.timed_debug();
size_t index = 0;
for (py::stl_input_iterator<py::object> it(ordered), end; it != end; ++it) {
auto wrapper{py::extract<std::shared_ptr<inode_wrapper>>(*it)()};
priority[wrapper->get()] = index++;
}
if (index != iv.size()) {
throw std::runtime_error("order() returned different number of files");
}
std::sort(iv.begin(), iv.end(),
[&](inode_ptr const& a, inode_ptr const& b) {
auto ia = priority.find(a.get());
auto ib = priority.find(b.get());
if (ia == priority.end() || ib == priority.end()) {
throw std::runtime_error(
"invalid inode pointer while ordering files");
}
return ia->second < ib->second;
});
td << "applied new inode order";
} catch (py::error_already_set) {
log_py_error();
throw std::runtime_error("error ordering inodes");
}
}
python_script::python_script(logger& lgr, const std::string& code,
const std::string& ctor)
: impl_(std::make_unique<impl>(lgr, code, ctor)) {}
python_script::~python_script() = default;
bool python_script::has_configure() const { return impl_->has_configure(); }
bool python_script::has_filter() const { return impl_->has_filter(); }
bool python_script::has_transform() const { return impl_->has_transform(); }
bool python_script::has_order() const { return impl_->has_order(); }
void python_script::configure(options_interface const& oi) {
impl_->configure(oi);
}
bool python_script::filter(entry_interface const& ei) {
return impl_->filter(ei);
}
void python_script::transform(entry_interface& ei) { impl_->transform(ei); }
void python_script::order(inode_vector& iv) { impl_->order(iv); }
} // namespace dwarfs

View File

@ -341,9 +341,15 @@ scanner_<LoggerPolicy>::scan_tree(const std::string& path, progress& prog) {
try {
auto pe = entry_->create(*os_, name, parent);
if (script_ && !script_->filter(*pe)) {
log_.debug() << "skipping " << name;
continue;
if (script_) {
if (script_->has_filter() && !script_->filter(*pe)) {
log_.debug() << "skipping " << name;
continue;
}
if (script_->has_transform()) {
script_->transform(*pe);
}
}
if (pe) {
@ -418,10 +424,16 @@ void scanner_<LoggerPolicy>::order_files(inode_manager& im) {
break;
}
case file_order_mode::SCRIPT:
case file_order_mode::SCRIPT: {
if (!script_->has_order()) {
throw std::runtime_error("script cannot order inodes");
}
log_.info() << "ordering " << im.count() << " inodes using script...";
auto ti = log_.timed_info();
im.order_inodes(script_);
ti << im.count() << " inodes ordered";
break;
}
case file_order_mode::SIMILARITY: {
log_.info() << "ordering " << im.count() << " inodes by similarity...";

View File

@ -45,6 +45,7 @@
#include <boost/program_options.hpp>
#include <folly/Conv.h>
#include <folly/FileUtil.h>
#include <folly/gen/String.h>
#include <fmt/format.h>
@ -62,12 +63,16 @@
#include "dwarfs/logger.h"
#include "dwarfs/mmap.h"
#include "dwarfs/options.h"
#include "dwarfs/options_interface.h"
#include "dwarfs/os_access_posix.h"
#include "dwarfs/progress.h"
#include "dwarfs/scanner.h"
#include "dwarfs/script.h"
#include "dwarfs/util.h"
#ifdef DWARFS_HAVE_PYTHON
#include "dwarfs/python_script.h"
#endif
namespace po = boost::program_options;
@ -87,7 +92,11 @@ namespace {
const std::map<std::string, file_order_mode> order_choices{
{"none", file_order_mode::NONE},
{"path", file_order_mode::PATH},
#ifdef DWARFS_HAVE_PYTHON
{"script", file_order_mode::SCRIPT},
#endif
{"similarity", file_order_mode::SIMILARITY}};
} // namespace
namespace dwarfs {
@ -106,6 +115,57 @@ void validate(boost::any& v, const std::vector<std::string>& values,
v = boost::any(it->second);
}
class script_options : public options_interface {
public:
script_options(logger& lgr, po::variables_map& vm, scanner_options& opts,
bool& force_similarity)
: log_(lgr)
, vm_(vm)
, opts_(opts)
, force_similarity_(force_similarity) {}
void set_order(file_order_mode order_mode, set_mode mode = DEFAULT) override {
set(opts_.file_order, order_mode, "order", mode);
}
void
set_remove_empty_dirs(bool remove_empty, set_mode mode = DEFAULT) override {
set(opts_.remove_empty_dirs, remove_empty, "remove-empty-dirs", mode);
}
void enable_similarity() override {
log_.debug() << "script is forcing similarity hash computation";
force_similarity_ = true;
}
private:
template <typename T>
void set(T& target, T const& value, std::string const& name, set_mode mode) {
switch (mode) {
case options_interface::DEFAULT:
if (!vm_.count(name) || vm_[name].defaulted()) {
log_.info() << "script is setting " << name << "=" << value;
target = value;
}
break;
case options_interface::OVERRIDE:
if (vm_.count(name) && !vm_[name].defaulted()) {
log_.warn() << "script is overriding " << name << "=" << value;
} else {
log_.info() << "script is setting " << name << "=" << value;
}
target = value;
break;
}
}
log_proxy<debug_logger_policy> log_;
po::variables_map& vm_;
scanner_options& opts_;
bool& force_similarity_;
};
} // namespace dwarfs
namespace {
@ -280,6 +340,11 @@ int mkdwarfs(int argc, char** argv) {
po::value<file_order_mode>(&options.file_order)
->default_value(file_order_mode::SIMILARITY, "similarity"),
order_desc.c_str())
#ifdef DWARFS_HAVE_PYTHON
("script",
po::value<std::string>(&script_arg),
"Python script for customization")
#endif
("blockhash-window-sizes",
po::value<std::string>(&window_sizes),
"window sizes for block hashing")
@ -425,6 +490,34 @@ int mkdwarfs(int argc, char** argv) {
std::shared_ptr<script> script;
#ifdef DWARFS_HAVE_PYTHON
if (!script_arg.empty()) {
std::string file, ctor;
if (auto pos = script_arg.find(':'); pos != std::string::npos) {
file = script_arg.substr(0, pos);
ctor = script_arg.substr(pos + 1);
if (ctor.find('(') == std::string::npos) {
ctor += "()";
}
} else {
file = script_arg;
ctor = "mkdwarfs()";
}
std::string code;
if (folly::readFile(file.c_str(), code)) {
script = std::make_shared<python_script>(lgr, code, ctor);
} else {
throw std::runtime_error("could not load script: " + file);
}
}
#endif
bool force_similarity = false;
if (script && script->has_configure()) {
script_options script_opts(lgr, vm, options, force_similarity);
script->configure(script_opts);
}
if (options.file_order == file_order_mode::SCRIPT && !script) {
throw std::runtime_error(
@ -463,9 +556,10 @@ int mkdwarfs(int argc, char** argv) {
ti << "filesystem rewritten";
} else {
scanner s(lgr, wg_scanner, cfg,
entry_factory::create(options.file_order ==
file_order_mode::SIMILARITY),
std::make_shared<os_access_posix>(), script, options);
entry_factory::create(force_similarity ||
options.file_order ==
file_order_mode::SIMILARITY),
std::make_shared<os_access_posix>(), std::move(script), options);
{
auto ti = log.timed_info();