From 89c895f6bf5044848a86456ca2126bfeab708e23 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 9 Apr 2025 08:15:12 +0200 Subject: [PATCH] feat(mkdwarfs): add `--memory-limit=auto` --- cmake/libdwarfs_tool.cmake | 1 + doc/mkdwarfs.md | 6 ++- tools/include/dwarfs/tool/sysinfo.h | 40 +++++++++++++++++ tools/src/mkdwarfs_main.cpp | 40 +++++++++++++---- tools/src/tool/sysinfo.cpp | 66 +++++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 11 deletions(-) create mode 100644 tools/include/dwarfs/tool/sysinfo.h create mode 100644 tools/src/tool/sysinfo.cpp diff --git a/cmake/libdwarfs_tool.cmake b/cmake/libdwarfs_tool.cmake index a4362166..4d56d452 100644 --- a/cmake/libdwarfs_tool.cmake +++ b/cmake/libdwarfs_tool.cmake @@ -25,6 +25,7 @@ add_library( tools/src/tool/main_adapter.cpp tools/src/tool/safe_main.cpp tools/src/tool/sys_char.cpp + tools/src/tool/sysinfo.cpp tools/src/tool/tool.cpp ) diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md index 29ff9752..104c1ccb 100644 --- a/doc/mkdwarfs.md +++ b/doc/mkdwarfs.md @@ -171,7 +171,7 @@ Most other options are concerned with compression tuning: be able to see some improvement. If your system is tight on memory, then decreasing this will potentially save a few MiBs. -- `-L`, `--memory-limit=`*value*: +- `-L`, `--memory-limit=auto|`*value*: Approximately how much memory you want `mkdwarfs` to use during filesystem creation. Note that currently this will only affect the block manager component, i.e. the number of filesystem blocks that are in flight but @@ -180,7 +180,9 @@ Most other options are concerned with compression tuning: good option when building large filesystems with expensive compression algorithms. Also note that most memory is likely used by the compression algorithms, so if you're short on memory it might be worth tweaking the - compression options. + compression options. The default `auto` mode will take into account the + number of workers, the block size, and the amount of system memory to try + to compute a reasonable limit. - `-C`, `--compression=`[*category*`::`]*algorithm*[`:`*algopt*[`=`*value*][`:`...]]: The compression algorithm and configuration used for file system data. diff --git a/tools/include/dwarfs/tool/sysinfo.h b/tools/include/dwarfs/tool/sysinfo.h new file mode 100644 index 00000000..f4d2c525 --- /dev/null +++ b/tools/include/dwarfs/tool/sysinfo.h @@ -0,0 +1,40 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the “Software”), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include + +namespace dwarfs::tool { + +class sysinfo { + public: + static uint64_t get_total_memory(); +}; + +} // namespace dwarfs::tool diff --git a/tools/src/mkdwarfs_main.cpp b/tools/src/mkdwarfs_main.cpp index 5078fe73..e7e375ec 100644 --- a/tools/src/mkdwarfs_main.cpp +++ b/tools/src/mkdwarfs_main.cpp @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -383,6 +384,19 @@ void validate(boost::any& v, std::vector const& values, v = categorize_optval{po::validators::get_single_string(values), true}; } +uint64_t +compute_memory_limit(uint64_t const block_size, uint64_t const num_cpu) { + auto const sys_mem = + std::max(tool::sysinfo::get_total_memory(), UINT64_C(256) << 20); + auto wanted_mem = num_cpu * block_size; + if (wanted_mem < sys_mem / 32) { + wanted_mem *= 2; + } else { + wanted_mem += std::min(num_cpu, UINT64_C(8)) * block_size; + } + return std::min(wanted_mem, sys_mem / 8); +} + } // namespace int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { @@ -507,7 +521,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { ->value_name(dep_def_val("num-workers")), "number of segmenter worker threads") ("memory-limit,L", - po::value(&memory_limit)->default_value("1g"), + po::value(&memory_limit)->default_value("auto"), "block manager memory limit") ("recompress", po::value(&recompress_opts)->implicit_value("all"), @@ -909,8 +923,6 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { } } - size_t mem_limit = parse_size_with_unit(memory_limit); - if (!vm.contains("num-scanner-workers")) { num_scanner_workers = num_workers; } @@ -1087,12 +1099,6 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { ? 2000ms : 200ms; - writer::filesystem_writer_options fswopts; - fswopts.max_queue_size = mem_limit; - fswopts.worst_case_block_size = UINT64_C(1) << sf_config.block_size_bits; - fswopts.remove_header = remove_header; - fswopts.no_section_index = no_section_index; - std::unique_ptr header_ifs; if (!header_str.empty()) { @@ -1123,6 +1129,16 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { // No more streaming to iol.err after this point as this would // cause a race with the progress thread. + size_t mem_limit = 0; + + if (memory_limit == "auto") { + mem_limit = compute_memory_limit(UINT64_C(1) << sf_config.block_size_bits, + num_workers); + LOG_VERBOSE << "using memory limit of " << size_with_unit(mem_limit); + } else { + mem_limit = parse_size_with_unit(memory_limit); + } + auto min_memory_req = num_workers * (UINT64_C(1) << sf_config.block_size_bits); @@ -1294,6 +1310,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { std::numeric_limits::max(), compress_niceness); + writer::filesystem_writer_options fswopts; + fswopts.max_queue_size = mem_limit; + fswopts.worst_case_block_size = UINT64_C(1) << sf_config.block_size_bits; + fswopts.remove_header = remove_header; + fswopts.no_section_index = no_section_index; + std::optional fsw; try { diff --git a/tools/src/tool/sysinfo.cpp b/tools/src/tool/sysinfo.cpp new file mode 100644 index 00000000..15e0bb42 --- /dev/null +++ b/tools/src/tool/sysinfo.cpp @@ -0,0 +1,66 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the “Software”), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#include +#else +#include +#endif + +#include + +namespace dwarfs::tool { + +uint64_t sysinfo::get_total_memory() { +#if defined(_WIN32) + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + if (::GlobalMemoryStatusEx(&statex)) { + return statex.ullTotalPhys; + } +#elif defined(__APPLE__) + int mib[2] = {CTL_HW, HW_MEMSIZE}; + int64_t memSize = 0; + size_t length = sizeof(memSize); + if (::sysctl(mib, 2, &memSize, &length, nullptr, 0) == 0) { + return static_cast(memSize); + } +#else + auto pages = ::sysconf(_SC_PHYS_PAGES); + auto page_size = ::sysconf(_SC_PAGE_SIZE); + if (pages != -1 && page_size != -1) { + return static_cast(pages) * static_cast(page_size); + } +#endif + return 0; +} + +} // namespace dwarfs::tool