dtoolutil: consistently use char32_t for Unicode code points

Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character.
This commit is contained in:
rdb 2018-10-15 22:23:45 +02:00
parent ebfb3702ac
commit 51f5124048
4 changed files with 25 additions and 25 deletions

View File

@ -220,7 +220,7 @@ get_unicode_char(size_t index) const {
* according to set_encoding(). * according to set_encoding().
*/ */
INLINE void TextEncoder:: INLINE void TextEncoder::
set_unicode_char(size_t index, int character) { set_unicode_char(size_t index, char32_t character) {
get_wtext(); get_wtext();
if (index < _wtext.length()) { if (index < _wtext.length()) {
_wtext[index] = character; _wtext[index] = character;
@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from,
* otherwise. This is akin to ctype's isalpha(), extended to Unicode. * otherwise. This is akin to ctype's isalpha(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_isalpha(int character) { unicode_isalpha(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
return false; return false;
@ -297,7 +297,7 @@ unicode_isalpha(int character) {
* otherwise. This is akin to ctype's isdigit(), extended to Unicode. * otherwise. This is akin to ctype's isdigit(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_isdigit(int character) { unicode_isdigit(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
// The digits aren't actually listed in the map. // The digits aren't actually listed in the map.
@ -312,7 +312,7 @@ unicode_isdigit(int character) {
* otherwise. This is akin to ctype's ispunct(), extended to Unicode. * otherwise. This is akin to ctype's ispunct(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_ispunct(int character) { unicode_ispunct(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
// Some punctuation marks aren't listed in the map. // Some punctuation marks aren't listed in the map.
@ -326,7 +326,7 @@ unicode_ispunct(int character) {
* otherwise. This is akin to ctype's isupper(), extended to Unicode. * otherwise. This is akin to ctype's isupper(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_isupper(int character) { unicode_isupper(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
return false; return false;
@ -339,7 +339,7 @@ unicode_isupper(int character) {
* otherwise. This is akin to ctype's isspace(), extended to Unicode. * otherwise. This is akin to ctype's isspace(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_isspace(int character) { unicode_isspace(char32_t character) {
switch (character) { switch (character) {
case ' ': case ' ':
case '\t': case '\t':
@ -356,7 +356,7 @@ unicode_isspace(int character) {
* otherwise. This is akin to ctype's islower(), extended to Unicode. * otherwise. This is akin to ctype's islower(), extended to Unicode.
*/ */
INLINE bool TextEncoder:: INLINE bool TextEncoder::
unicode_islower(int character) { unicode_islower(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
return false; return false;
@ -369,7 +369,7 @@ unicode_islower(int character) {
* akin to ctype's toupper(), extended to Unicode. * akin to ctype's toupper(), extended to Unicode.
*/ */
INLINE int TextEncoder:: INLINE int TextEncoder::
unicode_toupper(int character) { unicode_toupper(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
return character; return character;
@ -382,7 +382,7 @@ unicode_toupper(int character) {
* akin to ctype's tolower(), extended to Unicode. * akin to ctype's tolower(), extended to Unicode.
*/ */
INLINE int TextEncoder:: INLINE int TextEncoder::
unicode_tolower(int character) { unicode_tolower(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character); const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) { if (entry == nullptr) {
return character; return character;

View File

@ -23,7 +23,7 @@ class StringDecoder;
/** /**
* This class can be used to convert text between multiple representations, * This class can be used to convert text between multiple representations,
* e.g. utf-8 to Unicode. You may use it as a static class object, passing * e.g. UTF-8 to UTF-16. You may use it as a static class object, passing
* the encoding each time, or you may create an instance and use that object, * the encoding each time, or you may create an instance and use that object,
* which will record the current encoding and retain the current string. * which will record the current encoding and retain the current string.
* *
@ -78,21 +78,21 @@ PUBLISHED:
INLINE void append_unicode_char(char32_t character); INLINE void append_unicode_char(char32_t character);
INLINE size_t get_num_chars() const; INLINE size_t get_num_chars() const;
INLINE int get_unicode_char(size_t index) const; INLINE int get_unicode_char(size_t index) const;
INLINE void set_unicode_char(size_t index, int character); INLINE void set_unicode_char(size_t index, char32_t character);
INLINE std::string get_encoded_char(size_t index) const; INLINE std::string get_encoded_char(size_t index) const;
INLINE std::string get_encoded_char(size_t index, Encoding encoding) const; INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
INLINE std::string get_text_as_ascii() const; INLINE std::string get_text_as_ascii() const;
INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to); INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);
INLINE static bool unicode_isalpha(int character); INLINE static bool unicode_isalpha(char32_t character);
INLINE static bool unicode_isdigit(int character); INLINE static bool unicode_isdigit(char32_t character);
INLINE static bool unicode_ispunct(int character); INLINE static bool unicode_ispunct(char32_t character);
INLINE static bool unicode_islower(int character); INLINE static bool unicode_islower(char32_t character);
INLINE static bool unicode_isupper(int character); INLINE static bool unicode_isupper(char32_t character);
INLINE static bool unicode_isspace(int character); INLINE static bool unicode_isspace(char32_t character);
INLINE static int unicode_toupper(int character); INLINE static int unicode_toupper(char32_t character);
INLINE static int unicode_tolower(int character); INLINE static int unicode_tolower(char32_t character);
INLINE static std::string upper(const std::string &source); INLINE static std::string upper(const std::string &source);
INLINE static std::string upper(const std::string &source, Encoding encoding); INLINE static std::string upper(const std::string &source, Encoding encoding);

View File

@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = {
* Returns the Entry associated with the indicated character, if there is one. * Returns the Entry associated with the indicated character, if there is one.
*/ */
const UnicodeLatinMap::Entry *UnicodeLatinMap:: const UnicodeLatinMap::Entry *UnicodeLatinMap::
look_up(wchar_t character) { look_up(char32_t character) {
if (!_initialized) { if (!_initialized) {
init(); init();
} }

View File

@ -112,17 +112,17 @@ public:
class Entry { class Entry {
public: public:
wchar_t _character; char32_t _character;
CharType _char_type; CharType _char_type;
char _ascii_equiv; char _ascii_equiv;
char _ascii_additional; char _ascii_additional;
wchar_t _tolower_character; char32_t _tolower_character;
wchar_t _toupper_character; char32_t _toupper_character;
AccentType _accent_type; AccentType _accent_type;
int _additional_flags; int _additional_flags;
}; };
static const Entry *look_up(wchar_t character); static const Entry *look_up(char32_t character);
static wchar_t get_combining_accent(AccentType accent); static wchar_t get_combining_accent(AccentType accent);
@ -130,7 +130,7 @@ private:
static void init(); static void init();
static bool _initialized; static bool _initialized;
typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter; typedef phash_map<char32_t, const Entry *, integer_hash<char32_t> > ByCharacter;
static ByCharacter *_by_character; static ByCharacter *_by_character;
enum { max_direct_chars = 256 }; enum { max_direct_chars = 256 };
static const Entry *_direct_chars[max_direct_chars]; static const Entry *_direct_chars[max_direct_chars];