From 51f5124048f8f50b3580cec0815cb358de8cd041 Mon Sep 17 00:00:00 2001
From: rdb <git@rdb.name>
Date: Mon, 15 Oct 2018 22:23:45 +0200
Subject: [PATCH] dtoolutil: consistently use char32_t for Unicode code points

Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character.
---
 dtool/src/dtoolutil/textEncoder.I       | 18 +++++++++---------
 dtool/src/dtoolutil/textEncoder.h       | 20 ++++++++++----------
 dtool/src/dtoolutil/unicodeLatinMap.cxx |  2 +-
 dtool/src/dtoolutil/unicodeLatinMap.h   | 10 +++++-----
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/dtool/src/dtoolutil/textEncoder.I b/dtool/src/dtoolutil/textEncoder.I
index 766319d6da..c9eeb0ec66 100644
--- a/dtool/src/dtoolutil/textEncoder.I
+++ b/dtool/src/dtoolutil/textEncoder.I
@@ -220,7 +220,7 @@ get_unicode_char(size_t index) const {
  * according to set_encoding().
  */
 INLINE void TextEncoder::
-set_unicode_char(size_t index, int character) {
+set_unicode_char(size_t index, char32_t character) {
   get_wtext();
   if (index < _wtext.length()) {
     _wtext[index] = character;
@@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from,
  * otherwise.  This is akin to ctype's isalpha(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_isalpha(int character) {
+unicode_isalpha(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     return false;
@@ -297,7 +297,7 @@ unicode_isalpha(int character) {
  * otherwise.  This is akin to ctype's isdigit(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_isdigit(int character) {
+unicode_isdigit(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     // The digits aren't actually listed in the map.
@@ -312,7 +312,7 @@ unicode_isdigit(int character) {
  * otherwise.  This is akin to ctype's ispunct(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_ispunct(int character) {
+unicode_ispunct(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     // Some punctuation marks aren't listed in the map.
@@ -326,7 +326,7 @@ unicode_ispunct(int character) {
  * otherwise.  This is akin to ctype's isupper(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_isupper(int character) {
+unicode_isupper(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     return false;
@@ -339,7 +339,7 @@ unicode_isupper(int character) {
  * otherwise.  This is akin to ctype's isspace(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_isspace(int character) {
+unicode_isspace(char32_t character) {
   switch (character) {
   case ' ':
   case '\t':
@@ -356,7 +356,7 @@ unicode_isspace(int character) {
  * otherwise.  This is akin to ctype's islower(), extended to Unicode.
  */
 INLINE bool TextEncoder::
-unicode_islower(int character) {
+unicode_islower(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     return false;
@@ -369,7 +369,7 @@ unicode_islower(int character) {
  * akin to ctype's toupper(), extended to Unicode.
  */
 INLINE int TextEncoder::
-unicode_toupper(int character) {
+unicode_toupper(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     return character;
@@ -382,7 +382,7 @@ unicode_toupper(int character) {
  * akin to ctype's tolower(), extended to Unicode.
  */
 INLINE int TextEncoder::
-unicode_tolower(int character) {
+unicode_tolower(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
     return character;
diff --git a/dtool/src/dtoolutil/textEncoder.h b/dtool/src/dtoolutil/textEncoder.h
index 30004ef5d3..1e1d9eeda4 100644
--- a/dtool/src/dtoolutil/textEncoder.h
+++ b/dtool/src/dtoolutil/textEncoder.h
@@ -23,7 +23,7 @@ class StringDecoder;
 
 /**
  * This class can be used to convert text between multiple representations,
- * e.g.  utf-8 to Unicode.  You may use it as a static class object, passing
+ * e.g.  UTF-8 to UTF-16.  You may use it as a static class object, passing
  * the encoding each time, or you may create an instance and use that object,
  * which will record the current encoding and retain the current string.
  *
@@ -78,21 +78,21 @@ PUBLISHED:
   INLINE void append_unicode_char(char32_t character);
   INLINE size_t get_num_chars() const;
   INLINE int get_unicode_char(size_t index) const;
-  INLINE void set_unicode_char(size_t index, int character);
+  INLINE void set_unicode_char(size_t index, char32_t character);
   INLINE std::string get_encoded_char(size_t index) const;
   INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
   INLINE std::string get_text_as_ascii() const;
 
   INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);
 
-  INLINE static bool unicode_isalpha(int character);
-  INLINE static bool unicode_isdigit(int character);
-  INLINE static bool unicode_ispunct(int character);
-  INLINE static bool unicode_islower(int character);
-  INLINE static bool unicode_isupper(int character);
-  INLINE static bool unicode_isspace(int character);
-  INLINE static int unicode_toupper(int character);
-  INLINE static int unicode_tolower(int character);
+  INLINE static bool unicode_isalpha(char32_t character);
+  INLINE static bool unicode_isdigit(char32_t character);
+  INLINE static bool unicode_ispunct(char32_t character);
+  INLINE static bool unicode_islower(char32_t character);
+  INLINE static bool unicode_isupper(char32_t character);
+  INLINE static bool unicode_isspace(char32_t character);
+  INLINE static int unicode_toupper(char32_t character);
+  INLINE static int unicode_tolower(char32_t character);
 
   INLINE static std::string upper(const std::string &source);
   INLINE static std::string upper(const std::string &source, Encoding encoding);
diff --git a/dtool/src/dtoolutil/unicodeLatinMap.cxx b/dtool/src/dtoolutil/unicodeLatinMap.cxx
index 87c9cb5a7d..288b85a4ce 100644
--- a/dtool/src/dtoolutil/unicodeLatinMap.cxx
+++ b/dtool/src/dtoolutil/unicodeLatinMap.cxx
@@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = {
  * Returns the Entry associated with the indicated character, if there is one.
  */
 const UnicodeLatinMap::Entry *UnicodeLatinMap::
-look_up(wchar_t character) {
+look_up(char32_t character) {
   if (!_initialized) {
     init();
   }
diff --git a/dtool/src/dtoolutil/unicodeLatinMap.h b/dtool/src/dtoolutil/unicodeLatinMap.h
index fb94154f7f..6ed3c5f17c 100644
--- a/dtool/src/dtoolutil/unicodeLatinMap.h
+++ b/dtool/src/dtoolutil/unicodeLatinMap.h
@@ -112,17 +112,17 @@ public:
 
   class Entry {
   public:
-    wchar_t _character;
+    char32_t _character;
     CharType _char_type;
     char _ascii_equiv;
     char _ascii_additional;
-    wchar_t _tolower_character;
-    wchar_t _toupper_character;
+    char32_t _tolower_character;
+    char32_t _toupper_character;
     AccentType _accent_type;
     int _additional_flags;
   };
 
-  static const Entry *look_up(wchar_t character);
+  static const Entry *look_up(char32_t character);
 
   static wchar_t get_combining_accent(AccentType accent);
 
@@ -130,7 +130,7 @@ private:
   static void init();
   static bool _initialized;
 
-  typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter;
+  typedef phash_map<char32_t, const Entry *, integer_hash<char32_t> > ByCharacter;
   static ByCharacter *_by_character;
   enum { max_direct_chars = 256 };
   static const Entry *_direct_chars[max_direct_chars];