dtoolutil: consistently use char32_t for Unicode code points

Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character.
2025-10-01 01:07:51 -04:00 · 2018-10-15 22:23:45 +02:00 · 2018-10-15 22:23:45 +02:00 · 51f5124048
commit 51f5124048
parent ebfb3702ac
4 changed files with 25 additions and 25 deletions
--- a/dtool/src/dtoolutil/textEncoder.I
+++ b/dtool/src/dtoolutil/textEncoder.I
@ -220,7 +220,7 @@ get_unicode_char(size_t index) const {
 * according to set_encoding().
 */
 INLINE void TextEncoder::
-set_unicode_char(size_t index, int character) {
+set_unicode_char(size_t index, char32_t character) {
  get_wtext();
  if (index < _wtext.length()) {
    _wtext[index] = character;
@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from,
 * otherwise.  This is akin to ctype's isalpha(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_isalpha(int character) {
+unicode_isalpha(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    return false;
@ -297,7 +297,7 @@ unicode_isalpha(int character) {
 * otherwise.  This is akin to ctype's isdigit(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_isdigit(int character) {
+unicode_isdigit(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    // The digits aren't actually listed in the map.
@ -312,7 +312,7 @@ unicode_isdigit(int character) {
 * otherwise.  This is akin to ctype's ispunct(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_ispunct(int character) {
+unicode_ispunct(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    // Some punctuation marks aren't listed in the map.
@ -326,7 +326,7 @@ unicode_ispunct(int character) {
 * otherwise.  This is akin to ctype's isupper(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_isupper(int character) {
+unicode_isupper(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    return false;
@ -339,7 +339,7 @@ unicode_isupper(int character) {
 * otherwise.  This is akin to ctype's isspace(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_isspace(int character) {
+unicode_isspace(char32_t character) {
  switch (character) {
  case ' ':
  case '\t':
@ -356,7 +356,7 @@ unicode_isspace(int character) {
 * otherwise.  This is akin to ctype's islower(), extended to Unicode.
 */
 INLINE bool TextEncoder::
-unicode_islower(int character) {
+unicode_islower(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    return false;
@ -369,7 +369,7 @@ unicode_islower(int character) {
 * akin to ctype's toupper(), extended to Unicode.
 */
 INLINE int TextEncoder::
-unicode_toupper(int character) {
+unicode_toupper(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    return character;
@ -382,7 +382,7 @@ unicode_toupper(int character) {
 * akin to ctype's tolower(), extended to Unicode.
 */
 INLINE int TextEncoder::
-unicode_tolower(int character) {
+unicode_tolower(char32_t character) {
  const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
  if (entry == nullptr) {
    return character;
--- a/dtool/src/dtoolutil/textEncoder.h
+++ b/dtool/src/dtoolutil/textEncoder.h
@ -23,7 +23,7 @@ class StringDecoder;

 /**
 * This class can be used to convert text between multiple representations,
- * e.g.  utf-8 to Unicode.  You may use it as a static class object, passing
+ * e.g.  UTF-8 to UTF-16.  You may use it as a static class object, passing
 * the encoding each time, or you may create an instance and use that object,
 * which will record the current encoding and retain the current string.
 *
@ -78,21 +78,21 @@ PUBLISHED:
  INLINE void append_unicode_char(char32_t character);
  INLINE size_t get_num_chars() const;
  INLINE int get_unicode_char(size_t index) const;
-  INLINE void set_unicode_char(size_t index, int character);
+  INLINE void set_unicode_char(size_t index, char32_t character);
  INLINE std::string get_encoded_char(size_t index) const;
  INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
  INLINE std::string get_text_as_ascii() const;

  INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);

-  INLINE static bool unicode_isalpha(int character);
-  INLINE static bool unicode_isdigit(int character);
-  INLINE static bool unicode_ispunct(int character);
-  INLINE static bool unicode_islower(int character);
-  INLINE static bool unicode_isupper(int character);
-  INLINE static bool unicode_isspace(int character);
-  INLINE static int unicode_toupper(int character);
-  INLINE static int unicode_tolower(int character);
+  INLINE static bool unicode_isalpha(char32_t character);
+  INLINE static bool unicode_isdigit(char32_t character);
+  INLINE static bool unicode_ispunct(char32_t character);
+  INLINE static bool unicode_islower(char32_t character);
+  INLINE static bool unicode_isupper(char32_t character);
+  INLINE static bool unicode_isspace(char32_t character);
+  INLINE static int unicode_toupper(char32_t character);
+  INLINE static int unicode_tolower(char32_t character);

  INLINE static std::string upper(const std::string &source);
  INLINE static std::string upper(const std::string &source, Encoding encoding);
--- a/dtool/src/dtoolutil/unicodeLatinMap.cxx
+++ b/dtool/src/dtoolutil/unicodeLatinMap.cxx
@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = {
 * Returns the Entry associated with the indicated character, if there is one.
 */
 const UnicodeLatinMap::Entry *UnicodeLatinMap::
-look_up(wchar_t character) {
+look_up(char32_t character) {
  if (!_initialized) {
    init();
  }
--- a/dtool/src/dtoolutil/unicodeLatinMap.h
+++ b/dtool/src/dtoolutil/unicodeLatinMap.h
@ -112,17 +112,17 @@ public:

  class Entry {
  public:
-    wchar_t _character;
+    char32_t _character;
    CharType _char_type;
    char _ascii_equiv;
    char _ascii_additional;
-    wchar_t _tolower_character;
-    wchar_t _toupper_character;
+    char32_t _tolower_character;
+    char32_t _toupper_character;
    AccentType _accent_type;
    int _additional_flags;
  };

-  static const Entry *look_up(wchar_t character);
+  static const Entry *look_up(char32_t character);

  static wchar_t get_combining_accent(AccentType accent);

@ -130,7 +130,7 @@ private:
  static void init();
  static bool _initialized;

-  typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter;
+  typedef phash_map<char32_t, const Entry *, integer_hash<char32_t> > ByCharacter;
  static ByCharacter *_by_character;
  enum { max_direct_chars = 256 };
  static const Entry *_direct_chars[max_direct_chars];