text: Properly handle surrogate pairs in text on Windows

Fixes #1629
This commit is contained in:
rdb 2024-04-08 12:12:00 +02:00
parent 2adc167f26
commit 5da013e2e9
3 changed files with 90 additions and 25 deletions

View File

@ -168,7 +168,7 @@ get_num_characters() const {
* string. If the object at this position is a graphic object instead of a * string. If the object at this position is a graphic object instead of a
* character, returns 0. * character, returns 0.
*/ */
INLINE wchar_t TextAssembler:: INLINE char32_t TextAssembler::
get_character(int n) const { get_character(int n) const {
nassertr(n >= 0 && n < (int)_text_string.size(), 0); nassertr(n >= 0 && n < (int)_text_string.size(), 0);
return _text_string[n]._character; return _text_string[n]._character;
@ -232,7 +232,7 @@ get_num_cols(int r) const {
* the object at this position is a graphic object instead of a character, * the object at this position is a graphic object instead of a character,
* returns 0. * returns 0.
*/ */
INLINE wchar_t TextAssembler:: INLINE char32_t TextAssembler::
get_character(int r, int c) const { get_character(int r, int c) const {
nassertr(r >= 0 && r < (int)_text_block.size(), 0); nassertr(r >= 0 && r < (int)_text_block.size(), 0);
nassertr(c >= 0 && c < (int)_text_block[r]._string.size(), 0); nassertr(c >= 0 && c < (int)_text_block[r]._string.size(), 0);
@ -315,6 +315,18 @@ TextCharacter(wchar_t character,
{ {
} }
/**
*
*/
INLINE TextAssembler::TextCharacter::
TextCharacter(char32_t character,
TextAssembler::ComputedProperties *cprops) :
_character(character),
_graphic(nullptr),
_cprops(cprops)
{
}
/** /**
* *
*/ */

View File

@ -235,11 +235,16 @@ wstring TextAssembler::
get_plain_wtext() const { get_plain_wtext() const {
wstring wtext; wstring wtext;
TextString::const_iterator si; for (const TextCharacter &tch : _text_string) {
for (si = _text_string.begin(); si != _text_string.end(); ++si) {
const TextCharacter &tch = (*si);
if (tch._graphic == nullptr) { if (tch._graphic == nullptr) {
wtext += tch._character; if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else { } else {
wtext.push_back(0); wtext.push_back(0);
} }
@ -269,11 +274,16 @@ get_wordwrapped_plain_wtext() const {
wtext += '\n'; wtext += '\n';
} }
TextString::const_iterator si; for (const TextCharacter &tch : row._string) {
for (si = row._string.begin(); si != row._string.end(); ++si) {
const TextCharacter &tch = (*si);
if (tch._graphic == nullptr) { if (tch._graphic == nullptr) {
wtext += tch._character; if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else { } else {
wtext.push_back(0); wtext.push_back(0);
} }
@ -295,12 +305,17 @@ get_wtext() const {
wstring wtext; wstring wtext;
PT(ComputedProperties) current_cprops = _initial_cprops; PT(ComputedProperties) current_cprops = _initial_cprops;
TextString::const_iterator si; for (const TextCharacter &tch : _text_string) {
for (si = _text_string.begin(); si != _text_string.end(); ++si) {
const TextCharacter &tch = (*si);
current_cprops->append_delta(wtext, tch._cprops); current_cprops->append_delta(wtext, tch._cprops);
if (tch._graphic == nullptr) { if (tch._graphic == nullptr) {
wtext += tch._character; if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else { } else {
wtext.push_back(text_embed_graphic_key); wtext.push_back(text_embed_graphic_key);
wtext += tch._graphic_wname; wtext += tch._graphic_wname;
@ -341,12 +356,17 @@ get_wordwrapped_wtext() const {
wtext += '\n'; wtext += '\n';
} }
TextString::const_iterator si; for (const TextCharacter &tch : row._string) {
for (si = row._string.begin(); si != row._string.end(); ++si) {
const TextCharacter &tch = (*si);
current_cprops->append_delta(wtext, tch._cprops); current_cprops->append_delta(wtext, tch._cprops);
if (tch._graphic == nullptr) { if (tch._graphic == nullptr) {
wtext += tch._character; if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else { } else {
wtext.push_back(text_embed_graphic_key); wtext.push_back(text_embed_graphic_key);
wtext += tch._graphic_wname; wtext += tch._graphic_wname;
@ -623,6 +643,18 @@ assemble_text() {
*/ */
PN_stdfloat TextAssembler:: PN_stdfloat TextAssembler::
calc_width(wchar_t character, const TextProperties &properties) { calc_width(wchar_t character, const TextProperties &properties) {
return calc_width((char32_t)character, properties);
}
/**
* Returns the width of a single character, according to its associated font.
* This also correctly calculates the width of cheesy ligatures and accented
* characters, which may not exist in the font as such.
*
* This does not take kerning into account, however.
*/
PN_stdfloat TextAssembler::
calc_width(char32_t character, const TextProperties &properties) {
if (character == ' ') { if (character == ' ') {
// A space is a special case. // A space is a special case.
TextFont *font = properties.get_font(); TextFont *font = properties.get_font();
@ -846,6 +878,27 @@ scan_wtext(TextAssembler::TextString &output_string,
<< "Unknown TextGraphic: " << graphic_name << "\n"; << "Unknown TextGraphic: " << graphic_name << "\n";
} }
#if WCHAR_MAX < 0x10FFFF
} else if (*si >= 0xd800 && *si < 0xdc00) {
// This is a high surrogate. Look for a subsequent low surrogate.
wchar_t ch = *si;
++si;
if (si == send) {
text_cat.warning()
<< "High surrogate at end of text.\n";
return;
}
wchar_t ch2 = *si;
if (ch2 >= 0xdc00 && ch2 < 0xe000) {
char32_t code_point = 0x10000 + ((ch - 0xd800) << 10) + (ch2 - 0xdc00);
output_string.push_back(TextCharacter(code_point, current_cprops));
++si;
} else {
text_cat.warning()
<< "High surrogate was not followed by low surrogate in text.\n";
}
#endif
} else { } else {
// A normal character. Apply it. // A normal character. Apply it.
output_string.push_back(TextCharacter(*si, current_cprops)); output_string.push_back(TextCharacter(*si, current_cprops));
@ -1422,10 +1475,8 @@ assemble_row(TextAssembler::TextRow &row,
hb_buffer_t *harfbuff = nullptr; hb_buffer_t *harfbuff = nullptr;
#endif #endif
TextString::const_iterator si; for (const TextCharacter &tch : row._string) {
for (si = row._string.begin(); si != row._string.end(); ++si) { char32_t character = tch._character;
const TextCharacter &tch = (*si);
wchar_t character = tch._character;
const TextGraphic *graphic = tch._graphic; const TextGraphic *graphic = tch._graphic;
const TextProperties *properties = &(tch._cprops->_properties); const TextProperties *properties = &(tch._cprops->_properties);

View File

@ -78,14 +78,14 @@ PUBLISHED:
int calc_index(int r, int c) const; int calc_index(int r, int c) const;
INLINE int get_num_characters() const; INLINE int get_num_characters() const;
INLINE wchar_t get_character(int n) const; INLINE char32_t get_character(int n) const;
INLINE const TextGraphic *get_graphic(int n) const; INLINE const TextGraphic *get_graphic(int n) const;
INLINE const TextProperties &get_properties(int n) const; INLINE const TextProperties &get_properties(int n) const;
INLINE PN_stdfloat get_width(int n) const; INLINE PN_stdfloat get_width(int n) const;
INLINE int get_num_rows() const; INLINE int get_num_rows() const;
INLINE int get_num_cols(int r) const; INLINE int get_num_cols(int r) const;
INLINE wchar_t get_character(int r, int c) const; INLINE char32_t get_character(int r, int c) const;
INLINE const TextGraphic *get_graphic(int r, int c) const; INLINE const TextGraphic *get_graphic(int r, int c) const;
INLINE const TextProperties &get_properties(int r, int c) const; INLINE const TextProperties &get_properties(int r, int c) const;
INLINE PN_stdfloat get_width(int r, int c) const; INLINE PN_stdfloat get_width(int r, int c) const;
@ -98,6 +98,7 @@ PUBLISHED:
INLINE const LVector2 &get_lr() const; INLINE const LVector2 &get_lr() const;
static PN_stdfloat calc_width(wchar_t character, const TextProperties &properties); static PN_stdfloat calc_width(wchar_t character, const TextProperties &properties);
static PN_stdfloat calc_width(char32_t character, const TextProperties &properties);
static PN_stdfloat calc_width(const TextGraphic *graphic, const TextProperties &properties); static PN_stdfloat calc_width(const TextGraphic *graphic, const TextProperties &properties);
static bool has_exact_character(wchar_t character, const TextProperties &properties); static bool has_exact_character(wchar_t character, const TextProperties &properties);
@ -132,13 +133,14 @@ private:
class TextCharacter { class TextCharacter {
public: public:
INLINE TextCharacter(wchar_t character, ComputedProperties *cprops); INLINE TextCharacter(wchar_t character, ComputedProperties *cprops);
INLINE TextCharacter(char32_t character, ComputedProperties *cprops);
INLINE TextCharacter(const TextGraphic *graphic, INLINE TextCharacter(const TextGraphic *graphic,
const std::wstring &graphic_wname, const std::wstring &graphic_wname,
ComputedProperties *cprops); ComputedProperties *cprops);
INLINE TextCharacter(const TextCharacter &copy); INLINE TextCharacter(const TextCharacter &copy);
INLINE void operator = (const TextCharacter &copy); INLINE void operator = (const TextCharacter &copy);
wchar_t _character; char32_t _character;
const TextGraphic *_graphic; const TextGraphic *_graphic;
std::wstring _graphic_wname; std::wstring _graphic_wname;
PT(ComputedProperties) _cprops; PT(ComputedProperties) _cprops;