Fix interaction between debug presentation, precision, and width for strings (#4478)

This commit is contained in:
Victor Chernyakin 2025-07-04 07:33:47 -07:00 committed by GitHub
parent fc8d07cfe5
commit 2fa3e1a1bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 114 additions and 51 deletions

View File

@ -659,21 +659,9 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
} while (buf_ptr < buf + num_chars_left);
}
template <typename Char>
inline auto compute_width(basic_string_view<Char> s) -> size_t {
return s.size();
}
// Computes approximate display width of a UTF-8 string.
FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t {
size_t num_code_points = 0;
// It is not a lambda for compatibility with C++14.
struct count_code_points {
size_t* count;
FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool {
*count += to_unsigned(
1 +
(cp >= 0x1100 &&
FMT_CONSTEXPR inline auto display_width_of(uint32_t cp) noexcept -> size_t {
return to_unsigned(
1 + (cp >= 0x1100 &&
(cp <= 0x115f || // Hangul Jamo init. consonants
cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET
cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET
@ -691,12 +679,6 @@ FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t {
(cp >= 0x1f300 && cp <= 0x1f64f) ||
// Supplemental Symbols and Pictographs:
(cp >= 0x1f900 && cp <= 0x1f9ff))));
return true;
}
};
// We could avoid branches by using utf8_decode directly.
for_each_codepoint(s, count_code_points{&num_code_points});
return num_code_points;
}
template <typename T> struct is_integral : std::is_integral<T> {};
@ -2130,35 +2112,98 @@ FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
return write_int<Char>(out, make_write_int_arg(value, specs.sign()), specs);
}
inline auto convert_precision_to_size(string_view s, size_t precision)
-> size_t {
size_t display_width = 0;
size_t result = s.size();
for_each_codepoint(s, [&](uint32_t, string_view sv) {
display_width += compute_width(sv);
// Stop when display width exceeds precision.
if (display_width > precision) {
result = to_unsigned(sv.begin() - s.begin());
template <typename Char, typename OutputIt,
FMT_ENABLE_IF(std::is_same<Char, char>::value)>
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
const format_specs& specs) -> OutputIt {
bool is_debug = specs.type() == presentation_type::debug;
if (specs.precision < 0 && specs.width == 0) {
auto&& it = reserve(out, s.size());
return is_debug ? write_escaped_string(it, s) : copy<char>(s, it);
}
size_t display_width_limit =
specs.precision < 0 ? SIZE_MAX : to_unsigned(specs.precision);
size_t display_width =
!is_debug || specs.precision == 0 ? 0 : 1; // Account for opening "
size_t size = !is_debug || specs.precision == 0 ? 0 : 1;
for_each_codepoint(s, [&](uint32_t cp, string_view sv) {
if (is_debug && needs_escape(cp)) {
counting_buffer<char> buf;
write_escaped_cp(basic_appender<char>(buf),
find_escape_result<char>{sv.begin(), sv.end(), cp});
// We're reinterpreting bytes as display width. That's okay
// because write_escaped_cp() only writes ASCII characters.
size_t cp_width = buf.count();
if (display_width + cp_width <= display_width_limit) {
display_width += cp_width;
size += cp_width;
// If this is the end of the string, account for closing "
if (display_width < display_width_limit && sv.end() == s.end()) {
++display_width;
++size;
}
return true;
}
size += display_width_limit - display_width;
display_width = display_width_limit;
return false;
}
return true;
size_t cp_width = display_width_of(cp);
if (cp_width + display_width <= display_width_limit) {
display_width += cp_width;
size += sv.size();
// If this is the end of the string, account for closing "
if (is_debug && display_width < display_width_limit &&
sv.end() == s.end()) {
++display_width;
++size;
}
return true;
}
return false;
});
return result;
struct bounded_output_iterator {
reserve_iterator<OutputIt> underlying_iterator;
size_t bound;
FMT_CONSTEXPR auto operator*() -> bounded_output_iterator& { return *this; }
FMT_CONSTEXPR auto operator++() -> bounded_output_iterator& {
return *this;
}
FMT_CONSTEXPR auto operator++(int) -> bounded_output_iterator& {
return *this;
}
FMT_CONSTEXPR auto operator=(char c) -> bounded_output_iterator& {
if (bound > 0) {
*underlying_iterator++ = c;
--bound;
}
return *this;
}
};
return write_padded<char>(
out, specs, size, display_width, [&](reserve_iterator<OutputIt> it) {
return is_debug
? write_escaped_string(bounded_output_iterator{it, size}, s)
.underlying_iterator
: copy<char>(s.data(), s.data() + size, it);
});
}
template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
auto convert_precision_to_size(basic_string_view<Char>, size_t precision)
-> size_t {
return precision;
}
template <typename Char, typename OutputIt>
template <typename Char, typename OutputIt,
FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
const format_specs& specs) -> OutputIt {
auto data = s.data();
auto size = s.size();
if (specs.precision >= 0 && to_unsigned(specs.precision) < size)
size = convert_precision_to_size(s, to_unsigned(specs.precision));
size = to_unsigned(specs.precision);
bool is_debug = specs.type() == presentation_type::debug;
if (is_debug) {
@ -2167,22 +2212,19 @@ FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
size = buf.count();
}
size_t width = 0;
if (specs.width != 0) {
width =
is_debug ? size : compute_width(basic_string_view<Char>(data, size));
}
return write_padded<Char>(
out, specs, size, width, [=](reserve_iterator<OutputIt> it) {
out, specs, size, [&](reserve_iterator<OutputIt> it) {
return is_debug ? write_escaped_string(it, s)
: copy<Char>(data, data + size, it);
});
}
template <typename Char, typename OutputIt>
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
const format_specs& specs, locale_ref) -> OutputIt {
return write<Char>(out, s, specs);
}
template <typename Char, typename OutputIt>
FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs,
locale_ref) -> OutputIt {

View File

@ -206,10 +206,6 @@ TEST(util_test, parse_nonnegative_int) {
EXPECT_EQ(fmt::detail::parse_nonnegative_int(begin, end, -1), -1);
}
TEST(format_impl_test, compute_width) {
EXPECT_EQ(fmt::detail::compute_width("вожык"), 5);
}
TEST(util_test, utf8_to_utf16) {
auto u = fmt::detail::utf8_to_utf16("лошадка");
EXPECT_EQ(L"\x043B\x043E\x0448\x0430\x0434\x043A\x0430", u.str());
@ -887,6 +883,7 @@ TEST(format_test, width) {
" 0xcafe");
EXPECT_EQ(fmt::format("{:11}", 'x'), "x ");
EXPECT_EQ(fmt::format("{:12}", "str"), "str ");
EXPECT_EQ(fmt::format("{:*^5}", "🤡"), "*🤡**");
EXPECT_EQ(fmt::format("{:*^6}", "🤡"), "**🤡**");
EXPECT_EQ(fmt::format("{:*^8}", "你好"), "**你好**");
EXPECT_EQ(fmt::format("{:#6}", 42.0), " 42.");
@ -894,6 +891,31 @@ TEST(format_test, width) {
EXPECT_EQ(fmt::format("{:>06.0f}", 0.00884311), " 0");
}
TEST(format_test, debug_presentation) {
EXPECT_EQ(fmt::format("{:?}", ""), R"("")");
EXPECT_EQ(fmt::format("{:*<5.0?}", "\n"), R"(*****)");
EXPECT_EQ(fmt::format("{:*<5.1?}", "\n"), R"("****)");
EXPECT_EQ(fmt::format("{:*<5.2?}", "\n"), R"("\***)");
EXPECT_EQ(fmt::format("{:*<5.3?}", "\n"), R"("\n**)");
EXPECT_EQ(fmt::format("{:*<5.4?}", "\n"), R"("\n"*)");
EXPECT_EQ(fmt::format("{:*<5.1?}", "Σ"), R"("****)");
EXPECT_EQ(fmt::format("{:*<5.2?}", "Σ"), R"("Σ***)");
EXPECT_EQ(fmt::format("{:*<5.3?}", "Σ"), R"("Σ"**)");
EXPECT_EQ(fmt::format("{:*<5.1?}", ""), R"("****)");
EXPECT_EQ(fmt::format("{:*<5.2?}", ""), R"("****)");
EXPECT_EQ(fmt::format("{:*<5.3?}", ""), R"("**)");
EXPECT_EQ(fmt::format("{:*<5.4?}", ""), R"(""*)");
EXPECT_EQ(fmt::format("{:*<8?}", "туда"), R"("туда"**)");
EXPECT_EQ(fmt::format("{:*>8?}", "сюда"), R"(**"сюда")");
EXPECT_EQ(fmt::format("{:*^8?}", "中心"), R"(*""*)");
EXPECT_EQ(fmt::format("{:*^14?}", "A\t👈🤯ы猫"), R"(*"A\t👈🤯ы"*)");
}
auto bad_dynamic_spec_msg = FMT_BUILTIN_TYPES
? "width/precision is out of range"
: "width/precision is not integer";
@ -1134,7 +1156,6 @@ TEST(format_test, large_precision) {
TEST(format_test, utf8_precision) {
auto result = fmt::format("{:.4}", "caf\u00e9s"); // cafés
EXPECT_EQ(fmt::detail::compute_width(result), 4);
EXPECT_EQ(result, "caf\u00e9");
}