From 51f5124048f8f50b3580cec0815cb358de8cd041 Mon Sep 17 00:00:00 2001 From: rdb Date: Mon, 15 Oct 2018 22:23:45 +0200 Subject: [PATCH] dtoolutil: consistently use char32_t for Unicode code points Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character. --- dtool/src/dtoolutil/textEncoder.I | 18 +++++++++--------- dtool/src/dtoolutil/textEncoder.h | 20 ++++++++++---------- dtool/src/dtoolutil/unicodeLatinMap.cxx | 2 +- dtool/src/dtoolutil/unicodeLatinMap.h | 10 +++++----- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/dtool/src/dtoolutil/textEncoder.I b/dtool/src/dtoolutil/textEncoder.I index 766319d6da..c9eeb0ec66 100644 --- a/dtool/src/dtoolutil/textEncoder.I +++ b/dtool/src/dtoolutil/textEncoder.I @@ -220,7 +220,7 @@ get_unicode_char(size_t index) const { * according to set_encoding(). */ INLINE void TextEncoder:: -set_unicode_char(size_t index, int character) { +set_unicode_char(size_t index, char32_t character) { get_wtext(); if (index < _wtext.length()) { _wtext[index] = character; @@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from, * otherwise. This is akin to ctype's isalpha(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_isalpha(int character) { +unicode_isalpha(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { return false; @@ -297,7 +297,7 @@ unicode_isalpha(int character) { * otherwise. This is akin to ctype's isdigit(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_isdigit(int character) { +unicode_isdigit(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { // The digits aren't actually listed in the map. @@ -312,7 +312,7 @@ unicode_isdigit(int character) { * otherwise. This is akin to ctype's ispunct(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_ispunct(int character) { +unicode_ispunct(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { // Some punctuation marks aren't listed in the map. @@ -326,7 +326,7 @@ unicode_ispunct(int character) { * otherwise. This is akin to ctype's isupper(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_isupper(int character) { +unicode_isupper(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { return false; @@ -339,7 +339,7 @@ unicode_isupper(int character) { * otherwise. This is akin to ctype's isspace(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_isspace(int character) { +unicode_isspace(char32_t character) { switch (character) { case ' ': case '\t': @@ -356,7 +356,7 @@ unicode_isspace(int character) { * otherwise. This is akin to ctype's islower(), extended to Unicode. */ INLINE bool TextEncoder:: -unicode_islower(int character) { +unicode_islower(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { return false; @@ -369,7 +369,7 @@ unicode_islower(int character) { * akin to ctype's toupper(), extended to Unicode. */ INLINE int TextEncoder:: -unicode_toupper(int character) { +unicode_toupper(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { return character; @@ -382,7 +382,7 @@ unicode_toupper(int character) { * akin to ctype's tolower(), extended to Unicode. */ INLINE int TextEncoder:: -unicode_tolower(int character) { +unicode_tolower(char32_t character) { const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); if (entry == nullptr) { return character; diff --git a/dtool/src/dtoolutil/textEncoder.h b/dtool/src/dtoolutil/textEncoder.h index 30004ef5d3..1e1d9eeda4 100644 --- a/dtool/src/dtoolutil/textEncoder.h +++ b/dtool/src/dtoolutil/textEncoder.h @@ -23,7 +23,7 @@ class StringDecoder; /** * This class can be used to convert text between multiple representations, - * e.g. utf-8 to Unicode. You may use it as a static class object, passing + * e.g. UTF-8 to UTF-16. You may use it as a static class object, passing * the encoding each time, or you may create an instance and use that object, * which will record the current encoding and retain the current string. * @@ -78,21 +78,21 @@ PUBLISHED: INLINE void append_unicode_char(char32_t character); INLINE size_t get_num_chars() const; INLINE int get_unicode_char(size_t index) const; - INLINE void set_unicode_char(size_t index, int character); + INLINE void set_unicode_char(size_t index, char32_t character); INLINE std::string get_encoded_char(size_t index) const; INLINE std::string get_encoded_char(size_t index, Encoding encoding) const; INLINE std::string get_text_as_ascii() const; INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to); - INLINE static bool unicode_isalpha(int character); - INLINE static bool unicode_isdigit(int character); - INLINE static bool unicode_ispunct(int character); - INLINE static bool unicode_islower(int character); - INLINE static bool unicode_isupper(int character); - INLINE static bool unicode_isspace(int character); - INLINE static int unicode_toupper(int character); - INLINE static int unicode_tolower(int character); + INLINE static bool unicode_isalpha(char32_t character); + INLINE static bool unicode_isdigit(char32_t character); + INLINE static bool unicode_ispunct(char32_t character); + INLINE static bool unicode_islower(char32_t character); + INLINE static bool unicode_isupper(char32_t character); + INLINE static bool unicode_isspace(char32_t character); + INLINE static int unicode_toupper(char32_t character); + INLINE static int unicode_tolower(char32_t character); INLINE static std::string upper(const std::string &source); INLINE static std::string upper(const std::string &source, Encoding encoding); diff --git a/dtool/src/dtoolutil/unicodeLatinMap.cxx b/dtool/src/dtoolutil/unicodeLatinMap.cxx index 87c9cb5a7d..288b85a4ce 100644 --- a/dtool/src/dtoolutil/unicodeLatinMap.cxx +++ b/dtool/src/dtoolutil/unicodeLatinMap.cxx @@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = { * Returns the Entry associated with the indicated character, if there is one. */ const UnicodeLatinMap::Entry *UnicodeLatinMap:: -look_up(wchar_t character) { +look_up(char32_t character) { if (!_initialized) { init(); } diff --git a/dtool/src/dtoolutil/unicodeLatinMap.h b/dtool/src/dtoolutil/unicodeLatinMap.h index fb94154f7f..6ed3c5f17c 100644 --- a/dtool/src/dtoolutil/unicodeLatinMap.h +++ b/dtool/src/dtoolutil/unicodeLatinMap.h @@ -112,17 +112,17 @@ public: class Entry { public: - wchar_t _character; + char32_t _character; CharType _char_type; char _ascii_equiv; char _ascii_additional; - wchar_t _tolower_character; - wchar_t _toupper_character; + char32_t _tolower_character; + char32_t _toupper_character; AccentType _accent_type; int _additional_flags; }; - static const Entry *look_up(wchar_t character); + static const Entry *look_up(char32_t character); static wchar_t get_combining_accent(AccentType accent); @@ -130,7 +130,7 @@ private: static void init(); static bool _initialized; - typedef phash_map > ByCharacter; + typedef phash_map > ByCharacter; static ByCharacter *_by_character; enum { max_direct_chars = 256 }; static const Entry *_direct_chars[max_direct_chars];