text: Properly handle surrogate pairs in text on Windows

Fixes #1629
This commit is contained in:
rdb 2024-04-08 12:12:00 +02:00
parent 2adc167f26
commit 5da013e2e9
3 changed files with 90 additions and 25 deletions

View File

@ -168,7 +168,7 @@ get_num_characters() const {
* string. If the object at this position is a graphic object instead of a
* character, returns 0.
*/
INLINE wchar_t TextAssembler::
INLINE char32_t TextAssembler::
get_character(int n) const {
nassertr(n >= 0 && n < (int)_text_string.size(), 0);
return _text_string[n]._character;
@ -232,7 +232,7 @@ get_num_cols(int r) const {
* the object at this position is a graphic object instead of a character,
* returns 0.
*/
INLINE wchar_t TextAssembler::
INLINE char32_t TextAssembler::
get_character(int r, int c) const {
nassertr(r >= 0 && r < (int)_text_block.size(), 0);
nassertr(c >= 0 && c < (int)_text_block[r]._string.size(), 0);
@ -315,6 +315,18 @@ TextCharacter(wchar_t character,
{
}
/**
*
*/
INLINE TextAssembler::TextCharacter::
TextCharacter(char32_t character,
TextAssembler::ComputedProperties *cprops) :
_character(character),
_graphic(nullptr),
_cprops(cprops)
{
}
/**
*
*/

View File

@ -235,11 +235,16 @@ wstring TextAssembler::
get_plain_wtext() const {
wstring wtext;
TextString::const_iterator si;
for (si = _text_string.begin(); si != _text_string.end(); ++si) {
const TextCharacter &tch = (*si);
for (const TextCharacter &tch : _text_string) {
if (tch._graphic == nullptr) {
wtext += tch._character;
if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else {
wtext.push_back(0);
}
@ -269,11 +274,16 @@ get_wordwrapped_plain_wtext() const {
wtext += '\n';
}
TextString::const_iterator si;
for (si = row._string.begin(); si != row._string.end(); ++si) {
const TextCharacter &tch = (*si);
for (const TextCharacter &tch : row._string) {
if (tch._graphic == nullptr) {
wtext += tch._character;
if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else {
wtext.push_back(0);
}
@ -295,12 +305,17 @@ get_wtext() const {
wstring wtext;
PT(ComputedProperties) current_cprops = _initial_cprops;
TextString::const_iterator si;
for (si = _text_string.begin(); si != _text_string.end(); ++si) {
const TextCharacter &tch = (*si);
for (const TextCharacter &tch : _text_string) {
current_cprops->append_delta(wtext, tch._cprops);
if (tch._graphic == nullptr) {
wtext += tch._character;
if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else {
wtext.push_back(text_embed_graphic_key);
wtext += tch._graphic_wname;
@ -341,12 +356,17 @@ get_wordwrapped_wtext() const {
wtext += '\n';
}
TextString::const_iterator si;
for (si = row._string.begin(); si != row._string.end(); ++si) {
const TextCharacter &tch = (*si);
for (const TextCharacter &tch : row._string) {
current_cprops->append_delta(wtext, tch._cprops);
if (tch._graphic == nullptr) {
wtext += tch._character;
if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
wtext += (wchar_t)tch._character;
} else {
// Use a surrogate pair.
char32_t v = (char32_t)tch._character - 0x10000u;
wtext += (wchar_t)((v >> 10u) | 0xd800u);
wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
}
} else {
wtext.push_back(text_embed_graphic_key);
wtext += tch._graphic_wname;
@ -623,6 +643,18 @@ assemble_text() {
*/
PN_stdfloat TextAssembler::
calc_width(wchar_t character, const TextProperties &properties) {
return calc_width((char32_t)character, properties);
}
/**
* Returns the width of a single character, according to its associated font.
* This also correctly calculates the width of cheesy ligatures and accented
* characters, which may not exist in the font as such.
*
* This does not take kerning into account, however.
*/
PN_stdfloat TextAssembler::
calc_width(char32_t character, const TextProperties &properties) {
if (character == ' ') {
// A space is a special case.
TextFont *font = properties.get_font();
@ -846,6 +878,27 @@ scan_wtext(TextAssembler::TextString &output_string,
<< "Unknown TextGraphic: " << graphic_name << "\n";
}
#if WCHAR_MAX < 0x10FFFF
} else if (*si >= 0xd800 && *si < 0xdc00) {
// This is a high surrogate. Look for a subsequent low surrogate.
wchar_t ch = *si;
++si;
if (si == send) {
text_cat.warning()
<< "High surrogate at end of text.\n";
return;
}
wchar_t ch2 = *si;
if (ch2 >= 0xdc00 && ch2 < 0xe000) {
char32_t code_point = 0x10000 + ((ch - 0xd800) << 10) + (ch2 - 0xdc00);
output_string.push_back(TextCharacter(code_point, current_cprops));
++si;
} else {
text_cat.warning()
<< "High surrogate was not followed by low surrogate in text.\n";
}
#endif
} else {
// A normal character. Apply it.
output_string.push_back(TextCharacter(*si, current_cprops));
@ -1422,10 +1475,8 @@ assemble_row(TextAssembler::TextRow &row,
hb_buffer_t *harfbuff = nullptr;
#endif
TextString::const_iterator si;
for (si = row._string.begin(); si != row._string.end(); ++si) {
const TextCharacter &tch = (*si);
wchar_t character = tch._character;
for (const TextCharacter &tch : row._string) {
char32_t character = tch._character;
const TextGraphic *graphic = tch._graphic;
const TextProperties *properties = &(tch._cprops->_properties);

View File

@ -78,14 +78,14 @@ PUBLISHED:
int calc_index(int r, int c) const;
INLINE int get_num_characters() const;
INLINE wchar_t get_character(int n) const;
INLINE char32_t get_character(int n) const;
INLINE const TextGraphic *get_graphic(int n) const;
INLINE const TextProperties &get_properties(int n) const;
INLINE PN_stdfloat get_width(int n) const;
INLINE int get_num_rows() const;
INLINE int get_num_cols(int r) const;
INLINE wchar_t get_character(int r, int c) const;
INLINE char32_t get_character(int r, int c) const;
INLINE const TextGraphic *get_graphic(int r, int c) const;
INLINE const TextProperties &get_properties(int r, int c) const;
INLINE PN_stdfloat get_width(int r, int c) const;
@ -98,6 +98,7 @@ PUBLISHED:
INLINE const LVector2 &get_lr() const;
static PN_stdfloat calc_width(wchar_t character, const TextProperties &properties);
static PN_stdfloat calc_width(char32_t character, const TextProperties &properties);
static PN_stdfloat calc_width(const TextGraphic *graphic, const TextProperties &properties);
static bool has_exact_character(wchar_t character, const TextProperties &properties);
@ -132,13 +133,14 @@ private:
class TextCharacter {
public:
INLINE TextCharacter(wchar_t character, ComputedProperties *cprops);
INLINE TextCharacter(char32_t character, ComputedProperties *cprops);
INLINE TextCharacter(const TextGraphic *graphic,
const std::wstring &graphic_wname,
ComputedProperties *cprops);
INLINE TextCharacter(const TextCharacter &copy);
INLINE void operator = (const TextCharacter &copy);
wchar_t _character;
char32_t _character;
const TextGraphic *_graphic;
std::wstring _graphic_wname;
PT(ComputedProperties) _cprops;