feat(mkdwarfs): add --memory-limit=auto

This commit is contained in:
Marcus Holland-Moritz 2025-04-09 08:15:12 +02:00
parent 084176d5fd
commit 89c895f6bf
5 changed files with 142 additions and 11 deletions

View File

@ -25,6 +25,7 @@ add_library(
tools/src/tool/main_adapter.cpp
tools/src/tool/safe_main.cpp
tools/src/tool/sys_char.cpp
tools/src/tool/sysinfo.cpp
tools/src/tool/tool.cpp
)

View File

@ -171,7 +171,7 @@ Most other options are concerned with compression tuning:
be able to see some improvement. If your system is tight on memory, then
decreasing this will potentially save a few MiBs.
- `-L`, `--memory-limit=`*value*:
- `-L`, `--memory-limit=auto|`*value*:
Approximately how much memory you want `mkdwarfs` to use during filesystem
creation. Note that currently this will only affect the block manager
component, i.e. the number of filesystem blocks that are in flight but
@ -180,7 +180,9 @@ Most other options are concerned with compression tuning:
good option when building large filesystems with expensive compression
algorithms. Also note that most memory is likely used by the compression
algorithms, so if you're short on memory it might be worth tweaking the
compression options.
compression options. The default `auto` mode will take into account the
number of workers, the block size, and the amount of system memory to try
to compute a reasonable limit.
- `-C`, `--compression=`[*category*`::`]*algorithm*[`:`*algopt*[`=`*value*][`:`...]]:
The compression algorithm and configuration used for file system data.

View File

@ -0,0 +1,40 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the Software), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <cstdint>
namespace dwarfs::tool {
class sysinfo {
public:
static uint64_t get_total_memory();
};
} // namespace dwarfs::tool

View File

@ -77,6 +77,7 @@
#include <dwarfs/thread_pool.h>
#include <dwarfs/tool/iolayer.h>
#include <dwarfs/tool/program_options_helpers.h>
#include <dwarfs/tool/sysinfo.h>
#include <dwarfs/tool/tool.h>
#include <dwarfs/util.h>
#include <dwarfs/utility/rewrite_filesystem.h>
@ -383,6 +384,19 @@ void validate(boost::any& v, std::vector<std::string> const& values,
v = categorize_optval{po::validators::get_single_string(values), true};
}
uint64_t
compute_memory_limit(uint64_t const block_size, uint64_t const num_cpu) {
auto const sys_mem =
std::max(tool::sysinfo::get_total_memory(), UINT64_C(256) << 20);
auto wanted_mem = num_cpu * block_size;
if (wanted_mem < sys_mem / 32) {
wanted_mem *= 2;
} else {
wanted_mem += std::min(num_cpu, UINT64_C(8)) * block_size;
}
return std::min(wanted_mem, sys_mem / 8);
}
} // namespace
int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
@ -507,7 +521,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
->value_name(dep_def_val("num-workers")),
"number of segmenter worker threads")
("memory-limit,L",
po::value<std::string>(&memory_limit)->default_value("1g"),
po::value<std::string>(&memory_limit)->default_value("auto"),
"block manager memory limit")
("recompress",
po::value<std::string>(&recompress_opts)->implicit_value("all"),
@ -909,8 +923,6 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
}
}
size_t mem_limit = parse_size_with_unit(memory_limit);
if (!vm.contains("num-scanner-workers")) {
num_scanner_workers = num_workers;
}
@ -1087,12 +1099,6 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
? 2000ms
: 200ms;
writer::filesystem_writer_options fswopts;
fswopts.max_queue_size = mem_limit;
fswopts.worst_case_block_size = UINT64_C(1) << sf_config.block_size_bits;
fswopts.remove_header = remove_header;
fswopts.no_section_index = no_section_index;
std::unique_ptr<input_stream> header_ifs;
if (!header_str.empty()) {
@ -1123,6 +1129,16 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
// No more streaming to iol.err after this point as this would
// cause a race with the progress thread.
size_t mem_limit = 0;
if (memory_limit == "auto") {
mem_limit = compute_memory_limit(UINT64_C(1) << sf_config.block_size_bits,
num_workers);
LOG_VERBOSE << "using memory limit of " << size_with_unit(mem_limit);
} else {
mem_limit = parse_size_with_unit(memory_limit);
}
auto min_memory_req =
num_workers * (UINT64_C(1) << sf_config.block_size_bits);
@ -1294,6 +1310,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
std::numeric_limits<size_t>::max(),
compress_niceness);
writer::filesystem_writer_options fswopts;
fswopts.max_queue_size = mem_limit;
fswopts.worst_case_block_size = UINT64_C(1) << sf_config.block_size_bits;
fswopts.remove_header = remove_header;
fswopts.no_section_index = no_section_index;
std::optional<writer::filesystem_writer> fsw;
try {

View File

@ -0,0 +1,66 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the Software), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* SPDX-License-Identifier: MIT
*/
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#include <sys/types.h>
#else
#include <unistd.h>
#endif
#include <dwarfs/tool/sysinfo.h>
namespace dwarfs::tool {
uint64_t sysinfo::get_total_memory() {
#if defined(_WIN32)
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
if (::GlobalMemoryStatusEx(&statex)) {
return statex.ullTotalPhys;
}
#elif defined(__APPLE__)
int mib[2] = {CTL_HW, HW_MEMSIZE};
int64_t memSize = 0;
size_t length = sizeof(memSize);
if (::sysctl(mib, 2, &memSize, &length, nullptr, 0) == 0) {
return static_cast<uint64_t>(memSize);
}
#else
auto pages = ::sysconf(_SC_PHYS_PAGES);
auto page_size = ::sysconf(_SC_PAGE_SIZE);
if (pages != -1 && page_size != -1) {
return static_cast<uint64_t>(pages) * static_cast<uint64_t>(page_size);
}
#endif
return 0;
}
} // namespace dwarfs::tool