From b4cba6eb7401f4e136b629388e5ff3323928c2cb Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Thu, 3 Aug 2023 12:15:53 +0200 Subject: [PATCH] Remove Python scripting support --- .docker/Dockerfile | 1 - CMakeLists.txt | 36 +-- README.md | 4 - include/dwarfs/python_script.h | 53 ---- src/dwarfs/python_script.cpp | 453 --------------------------------- src/mkdwarfs_main.cpp | 41 --- 6 files changed, 2 insertions(+), 586 deletions(-) delete mode 100644 include/dwarfs/python_script.h delete mode 100644 src/dwarfs/python_script.cpp diff --git a/.docker/Dockerfile b/.docker/Dockerfile index d0ad4685..3f769749 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -32,7 +32,6 @@ RUN apt install -y \ libboost-filesystem-dev \ libboost-iostreams-dev \ libboost-program-options-dev \ - libboost-python-dev \ libboost-regex-dev \ libboost-system-dev \ libboost-thread-dev \ diff --git a/CMakeLists.txt b/CMakeLists.txt index e85448c2..7de42034 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,6 @@ if(WIN32) set(PREFER_SYSTEM_ZSTD ON) set(PREFER_SYSTEM_XXHASH ON) else() - option(WITH_PYTHON "build with Python scripting support" OFF) option(WITH_LEGACY_FUSE "build fuse2 driver even if we have fuse3" OFF) option(WITH_MAN_PAGES "build man pages using ronn" ON) option(ENABLE_ASAN "enable address sanitizer" OFF) @@ -121,9 +120,6 @@ if(STATIC_BUILD_DO_NOT_USE) set(CMAKE_PREFIX_PATH ${STATIC_BUILD_EXTRA_PREFIX}) endif() - if(WITH_PYTHON) - message(FATAL_ERROR "python is not supported in static builds") - endif() set(CMAKE_FIND_LIBRARY_SUFFIXES ".a" CACHE STRING "please look for static libs") @@ -188,27 +184,7 @@ else() FetchContent_MakeAvailable(fmt) endif() -list(APPEND DWARFS_BOOST_MODULES chrono iostreams program_options) - -if(WITH_PYTHON) - # TODO: would be nicer to be able to support a range of python versions - find_package(Python3 ${WITH_PYTHON_VERSION} EXACT REQUIRED - COMPONENTS Development) - list(APPEND DWARFS_BOOST_MODULES - "python${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}") - message( - STATUS - "Enabling support for Python ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}" - ) -endif() - -find_package(Boost 1.67 REQUIRED COMPONENTS ${DWARFS_BOOST_MODULES}) - -if(WITH_PYTHON) - set(BOOST_PYTHON_LIBS ${Boost_LIBRARIES}) - list(FILTER Boost_LIBRARIES EXCLUDE REGEX python) - list(FILTER BOOST_PYTHON_LIBS INCLUDE REGEX python) -endif() +find_package(Boost 1.67 REQUIRED COMPONENTS chrono iostreams program_options) if(STATIC_BUILD_DO_NOT_USE) set(CMAKE_FIND_LIBRARY_SUFFIXES .a) @@ -416,10 +392,6 @@ else() list(APPEND LIBDWARFS_SRC src/dwarfs/version.cpp) endif() -if(WITH_PYTHON) - list(APPEND LIBDWARFS_SRC src/dwarfs/python_script.cpp) -endif() - list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/null.cpp) list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/zstd.cpp) @@ -755,7 +727,7 @@ foreach(tgt dwarfs dwarfs_compression dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGE $<$:DWARFS_HAVE_LIBLZ4> $<$:DWARFS_HAVE_LIBLZMA> $<$,$>:DWARFS_HAVE_LIBBROTLI> - $<$:DWARFS_HAVE_PYTHON>) + ) if(DWARFS_USE_EXCEPTION_TRACER) target_compile_definitions(${tgt} PRIVATE DWARFS_USE_EXCEPTION_TRACER) @@ -859,10 +831,6 @@ else() target_link_libraries(dwarfs xxhash) endif() -if(WITH_PYTHON) - target_link_libraries(dwarfs ${BOOST_PYTHON_LIBS} ${Python3_LIBRARIES}) -endif() - foreach(tgt ${BINARY_TARGETS} ${MAIN_TARGETS}) target_link_libraries(${tgt} "$") target_link_libraries(${tgt} dwarfs dwarfs_tool) diff --git a/README.md b/README.md index 67819479..1dceece5 100644 --- a/README.md +++ b/README.md @@ -109,9 +109,6 @@ Distinct features of DwarFS are: [FUSE driver](doc/dwarfs.md) are able to make good use of the many cores of your system. -- Optional experimental Python scripting support to provide custom - filtering and ordering functionality. - ## History I started working on DwarFS in 2013 and my main use case and major @@ -257,7 +254,6 @@ $ apt install \ libboost-filesystem-dev \ libboost-iostreams-dev \ libboost-program-options-dev \ - libboost-python-dev \ libboost-regex-dev \ libboost-system-dev \ libboost-thread-dev \ diff --git a/include/dwarfs/python_script.h b/include/dwarfs/python_script.h deleted file mode 100644 index e06db6f1..00000000 --- a/include/dwarfs/python_script.h +++ /dev/null @@ -1,53 +0,0 @@ -/* vim:set ts=2 sw=2 sts=2 et: */ -/** - * \author Marcus Holland-Moritz (github@mhxnet.de) - * \copyright Copyright (c) Marcus Holland-Moritz - * - * This file is part of dwarfs. - * - * dwarfs is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * dwarfs is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with dwarfs. If not, see . - */ - -#pragma once - -#include - -#include "dwarfs/inode.h" -#include "dwarfs/script.h" - -namespace dwarfs { - -class logger; - -class python_script : public script { - public: - python_script(logger& lgr, const std::string& code, const std::string& ctor); - ~python_script(); - - bool has_configure() const override; - bool has_filter() const override; - bool has_transform() const override; - bool has_order() const override; - - void configure(options_interface const& oi) override; - bool filter(entry_interface const& ei) override; - void transform(entry_interface& ei) override; - void order(inode_vector& iv) override; - - private: - class impl; - std::unique_ptr impl_; -}; - -} // namespace dwarfs diff --git a/src/dwarfs/python_script.cpp b/src/dwarfs/python_script.cpp deleted file mode 100644 index 934e1d95..00000000 --- a/src/dwarfs/python_script.cpp +++ /dev/null @@ -1,453 +0,0 @@ -/* vim:set ts=2 sw=2 sts=2 et: */ -/** - * \author Marcus Holland-Moritz (github@mhxnet.de) - * \copyright Copyright (c) Marcus Holland-Moritz - * - * This file is part of dwarfs. - * - * dwarfs is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * dwarfs is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with dwarfs. If not, see . - */ - -#include -#include - -#include -#include - -#include - -#include - -#include "dwarfs/entry.h" -#include "dwarfs/error.h" -#include "dwarfs/inode.h" -#include "dwarfs/logger.h" -#include "dwarfs/options_interface.h" -#include "dwarfs/python_script.h" - -namespace dwarfs { - -namespace py = boost::python; - -namespace { - -std::unordered_set supported_methods{"configure", "filter", - "transform", "order"}; - -void init_python() { - static bool initialized = false; - if (!initialized) { - Py_Initialize(); - initialized = true; - } -} - -bool callable(py::object object) { return 1 == PyCallable_Check(object.ptr()); } - -bool hasattr(py::object obj, const char* name) { - return PyObject_HasAttrString(obj.ptr(), name); -} - -bool has_callable(py::object obj, char const* method) { - return hasattr(obj, method) && callable(obj.attr(method)); -} - -class py_logger { - public: - explicit py_logger(logger& lgr) - : log_(lgr) {} - - void error(std::string const& msg) { LOG_ERROR << "[script] " << msg; } - void warn(std::string const& msg) { LOG_WARN << "[script] " << msg; } - void info(std::string const& msg) { LOG_INFO << "[script] " << msg; } - void debug(std::string const& msg) { LOG_DEBUG << "[script] " << msg; } - void trace(std::string const& msg) { LOG_TRACE << "[script] " << msg; } - - private: - using log_proxy_t = log_proxy; - log_proxy_t log_; -}; - -template -class basic_entry_wrapper { - public: - explicit basic_entry_wrapper(T& entry) - : entry_(&entry) {} - - size_t size() const { return entry_->size(); } - std::string path() const { return entry_->path(); } - std::string name() const { return entry_->name(); } - std::string type() const { return entry_->type_string(); } - - uint16_t permissions() const { return entry_->get_permissions(); } - void set_permissions(uint16_t perm) { entry_->set_permissions(perm); } - uint16_t uid() const { return entry_->get_uid(); } - void set_uid(uint16_t uid) { entry_->set_uid(uid); } - uint16_t gid() const { return entry_->get_gid(); } - void set_gid(uint16_t gid) { entry_->set_gid(gid); } - uint64_t atime() const { return entry_->get_atime(); } - void set_atime(uint64_t atime) { entry_->set_atime(atime); } - uint64_t mtime() const { return entry_->get_mtime(); } - void set_mtime(uint64_t mtime) { entry_->set_mtime(mtime); } - uint64_t ctime() const { return entry_->get_ctime(); } - void set_ctime(uint64_t ctime) { entry_->set_ctime(ctime); } - - private: - T* entry_; -}; - -using entry_wrapper = basic_entry_wrapper; -using mutable_entry_wrapper = basic_entry_wrapper; - -class inode_wrapper { - public: - explicit inode_wrapper(inode const* ino) - : ino_(ino) {} - - size_t similarity_hash() const { return ino_->similarity_hash(); } - size_t refcount() const { return ino_->files().size(); } - py::list paths() const { - py::list ps; - auto& fs = ino_->files(); - for (auto& f : fs) { - ps.append(f->path()); - } - return ps; - } - size_t size() const { return ino_->size(); } - inode const* get() const { return ino_; } - - private: - inode const* ino_; -}; - -} // namespace - -class python_script::impl { - public: - impl(logger& lgr, const std::string& code, const std::string& ctor); - ~impl(); - - void configure(options_interface const& oi); - bool filter(entry_interface const& ei); - void transform(entry_interface& ei); - void order(inode_vector& iv); - - bool has_configure() const { return has_configure_; } - bool has_filter() const { return has_filter_; } - bool has_transform() const { return has_transform_; } - bool has_order() const { return has_order_; } - - private: - void check_instance_methods(py::object obj) const; - void log_py_error() const; - - using log_proxy_t = log_proxy; - using clock = std::chrono::steady_clock; - - class timer { - public: - explicit timer(clock::duration& d) - : start_(clock::now()) - , d_(d) {} - - ~timer() { d_ += clock::now() - start_; } - - private: - clock::time_point start_; - clock::duration& d_; - }; - - log_proxy_t log_; - py_logger pylog_; - bool has_configure_{false}; - bool has_filter_{false}; - bool has_transform_{false}; - bool has_order_{false}; - py::object instance_; - py::object main_module_; - py::object main_namespace_; - clock::duration configure_time_{}; - clock::duration filter_time_{}; - clock::duration transform_time_{}; - clock::duration order_time_{}; -}; - -python_script::impl::impl(logger& lgr, const std::string& code, - const std::string& ctor) - : log_(lgr) - , pylog_(lgr) { - try { - init_python(); - - main_module_ = py::import("__main__"); - main_namespace_ = main_module_.attr("__dict__"); - - py::scope scope(main_module_); - - main_namespace_["dwarfs_logger"] = - py::class_("dwarfs_logger", py::no_init) - .def("error", &py_logger::error) - .def("warn", &py_logger::warn) - .def("info", &py_logger::info) - .def("debug", &py_logger::debug) - .def("trace", &py_logger::trace); - - main_namespace_["file_order_mode"] = - py::enum_("file_order_mode") - .value("none", file_order_mode::NONE) - .value("path", file_order_mode::PATH) - .value("script", file_order_mode::SCRIPT) - .value("similarity", file_order_mode::SIMILARITY) - .value("nilsimsa", file_order_mode::NILSIMSA); - - main_namespace_["set_mode"] = - py::enum_("set_mode") - .value("default", options_interface::DEFAULT) - .value("override", options_interface::OVERRIDE); - - main_namespace_["dwarfs_options"] = - py::class_("dwarfs_options", - py::no_init) - .def("enable_similarity", &options_interface::enable_similarity) - .def("set_order", &options_interface::set_order) - .def("set_remove_empty_dirs", - &options_interface::set_remove_empty_dirs); - - main_namespace_["inode_wrapper"] = - py::class_>( - "inode_wrapper", py::no_init) - .def("similarity_hash", &inode_wrapper::similarity_hash) - .def("refcount", &inode_wrapper::refcount) - .def("paths", &inode_wrapper::paths) - .def("size", &inode_wrapper::size); - - main_namespace_["entry_wrapper"] = - py::class_>( - "entry_wrapper", py::no_init) - .def("name", &entry_wrapper::name) - .def("type", &entry_wrapper::type) - .def("path", &entry_wrapper::path) - .def("size", &entry_wrapper::size) - .def("permissions", &entry_wrapper::permissions) - .def("uid", &entry_wrapper::uid) - .def("gid", &entry_wrapper::gid) - .def("atime", &entry_wrapper::atime) - .def("mtime", &entry_wrapper::mtime) - .def("ctime", &entry_wrapper::ctime); - - main_namespace_["mutable_entry_wrapper"] = - py::class_>( - "mutable_entry_wrapper", py::no_init) - .def("name", &mutable_entry_wrapper::name) - .def("type", &mutable_entry_wrapper::type) - .def("path", &mutable_entry_wrapper::path) - .def("size", &mutable_entry_wrapper::size) - .def("permissions", &mutable_entry_wrapper::permissions) - .def("uid", &mutable_entry_wrapper::uid) - .def("gid", &mutable_entry_wrapper::gid) - .def("atime", &mutable_entry_wrapper::atime) - .def("mtime", &mutable_entry_wrapper::mtime) - .def("ctime", &mutable_entry_wrapper::ctime) - .def("set_permissions", &mutable_entry_wrapper::set_permissions) - .def("set_uid", &mutable_entry_wrapper::set_uid) - .def("set_gid", &mutable_entry_wrapper::set_gid) - .def("set_atime", &mutable_entry_wrapper::set_atime) - .def("set_mtime", &mutable_entry_wrapper::set_mtime) - .def("set_ctime", &mutable_entry_wrapper::set_ctime); - - main_namespace_["logger"] = py::ptr(&pylog_); - - py::exec(code.c_str(), main_namespace_); - - instance_ = py::eval(ctor.c_str(), main_namespace_); - - check_instance_methods(instance_); - - has_configure_ = has_callable(instance_, "configure"); - has_filter_ = has_callable(instance_, "filter"); - has_transform_ = has_callable(instance_, "transform"); - has_order_ = has_callable(instance_, "order"); - } catch (py::error_already_set const&) { - log_py_error(); - DWARFS_THROW(runtime_error, "error initializing script"); - } -} - -void python_script::impl::log_py_error() const { - PyObject *exc, *val, *tb; - PyErr_Fetch(&exc, &val, &tb); - PyErr_NormalizeException(&exc, &val, &tb); - - py::handle<> hexc(exc), hval(py::allow_null(val)), htb(py::allow_null(tb)); - - if (!hval) { - LOG_ERROR << std::string(py::extract(py::str(hexc))); - } else { - py::object traceback(py::import("traceback")); - py::object format_exception(traceback.attr("format_exception")); - py::list formatted_list(format_exception(hexc, hval, htb)); - for (int count = 0; count < len(formatted_list); ++count) { - LOG_ERROR << std::string( - py::extract(formatted_list[count].slice(0, -1))); - } - } -} - -void python_script::impl::check_instance_methods(py::object obj) const { - for (py::stl_input_iterator - it(py::object(py::handle<>(PyObject_Dir(obj.ptr())))), - end; - it != end; ++it) { - if (!it->startswith("_") && callable(obj.attr(*it))) { - std::string name{py::extract(*it)}; - if (supported_methods.find(name) == supported_methods.end()) { - LOG_WARN << "unknown method '" << name << "' found in Python class"; - } - } - } -} - -python_script::impl::~impl() { - std::vector timings; - auto add_timing = [&](bool flag, std::string_view name, auto const& d) { - using floatsec = std::chrono::duration; - if (flag) { - timings.push_back( - fmt::format("{0} {1:.2f}s", name, - std::chrono::duration_cast(d).count())); - } - }; - - add_timing(has_configure_, "configure", configure_time_); - add_timing(has_filter_, "filter", filter_time_); - add_timing(has_transform_, "transform", transform_time_); - add_timing(has_order_, "order", order_time_); - - LOG_INFO << "script time: " << boost::join(timings, ", "); - - // nothing else, really, as boost::python docs forbid using Py_Finalize -} - -void python_script::impl::configure(options_interface const& oi) { - timer tmr(configure_time_); - try { - instance_.attr("configure")(py::ptr(&oi)); - } catch (py::error_already_set const&) { - log_py_error(); - DWARFS_THROW(runtime_error, "error in configure"); - } -} - -bool python_script::impl::filter(entry_interface const& ei) { - timer tmr(filter_time_); - try { - return py::extract( - instance_.attr("filter")(std::make_shared(ei))); - } catch (py::error_already_set const&) { - log_py_error(); - DWARFS_THROW(runtime_error, "error filtering entry"); - } -} - -void python_script::impl::transform(entry_interface& ei) { - timer tmr(transform_time_); - try { - instance_.attr("transform")(std::make_shared(ei)); - } catch (py::error_already_set const&) { - log_py_error(); - DWARFS_THROW(runtime_error, "error transforming entry"); - } -} - -void python_script::impl::order(inode_vector& iv) { - timer tmr(order_time_); - try { - py::list files; - - { - auto td = LOG_TIMED_DEBUG; - - for (size_t i = 0; i < iv.size(); ++i) { - files.append(std::make_shared(iv[i].get())); - } - - td << "prepared files for ordering"; - } - - py::object ordered; - - { - auto td = LOG_TIMED_DEBUG; - ordered = instance_.attr("order")(files); - td << "ordered files in script code"; - } - - folly::F14FastMap priority(iv.size()); - - auto td = LOG_TIMED_DEBUG; - size_t index = 0; - - for (py::stl_input_iterator it(ordered), end; it != end; ++it) { - auto wrapper{py::extract>(*it)()}; - priority[wrapper->get()] = index++; - } - - if (index != iv.size()) { - DWARFS_THROW(runtime_error, "order() returned different number of files"); - } - - std::sort(iv.begin(), iv.end(), - [&](inode_ptr const& a, inode_ptr const& b) { - auto ia = priority.find(a.get()); - auto ib = priority.find(b.get()); - if (ia == priority.end() || ib == priority.end()) { - DWARFS_THROW(runtime_error, - "invalid inode pointer while ordering files"); - } - return ia->second < ib->second; - }); - - td << "applied new inode order"; - } catch (py::error_already_set const&) { - log_py_error(); - DWARFS_THROW(runtime_error, "error ordering inodes"); - } -} - -python_script::python_script(logger& lgr, const std::string& code, - const std::string& ctor) - : impl_(std::make_unique(lgr, code, ctor)) {} - -python_script::~python_script() = default; - -bool python_script::has_configure() const { return impl_->has_configure(); } -bool python_script::has_filter() const { return impl_->has_filter(); } -bool python_script::has_transform() const { return impl_->has_transform(); } -bool python_script::has_order() const { return impl_->has_order(); } - -void python_script::configure(options_interface const& oi) { - impl_->configure(oi); -} - -bool python_script::filter(entry_interface const& ei) { - return impl_->filter(ei); -} - -void python_script::transform(entry_interface& ei) { impl_->transform(ei); } - -void python_script::order(inode_vector& iv) { impl_->order(iv); } - -} // namespace dwarfs diff --git a/src/mkdwarfs_main.cpp b/src/mkdwarfs_main.cpp index f89ab01a..30615f1d 100644 --- a/src/mkdwarfs_main.cpp +++ b/src/mkdwarfs_main.cpp @@ -73,10 +73,6 @@ #include "dwarfs/util.h" #include "dwarfs_tool_main.h" -#ifdef DWARFS_HAVE_PYTHON -#include "dwarfs/python_script.h" -#endif - namespace po = boost::program_options; namespace dwarfs { @@ -96,9 +92,6 @@ enum class debug_filter_mode { const std::map order_choices{ {"none", file_order_mode::NONE}, {"path", file_order_mode::PATH}, -#ifdef DWARFS_HAVE_PYTHON - {"script", file_order_mode::SCRIPT}, -#endif {"similarity", file_order_mode::SIMILARITY}, {"nilsimsa", file_order_mode::NILSIMSA}, }; @@ -455,11 +448,6 @@ int mkdwarfs_main(int argc, sys_char** argv) { ("max-similarity-size", po::value(&max_similarity_size), "maximum file size to compute similarity") -#ifdef DWARFS_HAVE_PYTHON - ("script", - po::value(&script_arg), - "Python script for customization") -#endif ("file-hash", po::value(&file_hash_algo)->default_value("xxh3-128"), file_hash_desc.c_str()) @@ -875,36 +863,7 @@ int mkdwarfs_main(int argc, sys_char** argv) { std::shared_ptr