dtoolutil: consistently use char32_t for Unicode code points

Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character.
This commit is contained in:
rdb 2018-10-15 22:23:45 +02:00
parent ebfb3702ac
commit 51f5124048
4 changed files with 25 additions and 25 deletions

View File

@ -220,7 +220,7 @@ get_unicode_char(size_t index) const {
* according to set_encoding().
*/
INLINE void TextEncoder::
set_unicode_char(size_t index, int character) {
set_unicode_char(size_t index, char32_t character) {
get_wtext();
if (index < _wtext.length()) {
_wtext[index] = character;
@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from,
* otherwise. This is akin to ctype's isalpha(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_isalpha(int character) {
unicode_isalpha(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
return false;
@ -297,7 +297,7 @@ unicode_isalpha(int character) {
* otherwise. This is akin to ctype's isdigit(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_isdigit(int character) {
unicode_isdigit(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
// The digits aren't actually listed in the map.
@ -312,7 +312,7 @@ unicode_isdigit(int character) {
* otherwise. This is akin to ctype's ispunct(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_ispunct(int character) {
unicode_ispunct(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
// Some punctuation marks aren't listed in the map.
@ -326,7 +326,7 @@ unicode_ispunct(int character) {
* otherwise. This is akin to ctype's isupper(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_isupper(int character) {
unicode_isupper(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
return false;
@ -339,7 +339,7 @@ unicode_isupper(int character) {
* otherwise. This is akin to ctype's isspace(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_isspace(int character) {
unicode_isspace(char32_t character) {
switch (character) {
case ' ':
case '\t':
@ -356,7 +356,7 @@ unicode_isspace(int character) {
* otherwise. This is akin to ctype's islower(), extended to Unicode.
*/
INLINE bool TextEncoder::
unicode_islower(int character) {
unicode_islower(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
return false;
@ -369,7 +369,7 @@ unicode_islower(int character) {
* akin to ctype's toupper(), extended to Unicode.
*/
INLINE int TextEncoder::
unicode_toupper(int character) {
unicode_toupper(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
return character;
@ -382,7 +382,7 @@ unicode_toupper(int character) {
* akin to ctype's tolower(), extended to Unicode.
*/
INLINE int TextEncoder::
unicode_tolower(int character) {
unicode_tolower(char32_t character) {
const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
if (entry == nullptr) {
return character;

View File

@ -23,7 +23,7 @@ class StringDecoder;
/**
* This class can be used to convert text between multiple representations,
* e.g. utf-8 to Unicode. You may use it as a static class object, passing
* e.g. UTF-8 to UTF-16. You may use it as a static class object, passing
* the encoding each time, or you may create an instance and use that object,
* which will record the current encoding and retain the current string.
*
@ -78,21 +78,21 @@ PUBLISHED:
INLINE void append_unicode_char(char32_t character);
INLINE size_t get_num_chars() const;
INLINE int get_unicode_char(size_t index) const;
INLINE void set_unicode_char(size_t index, int character);
INLINE void set_unicode_char(size_t index, char32_t character);
INLINE std::string get_encoded_char(size_t index) const;
INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
INLINE std::string get_text_as_ascii() const;
INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);
INLINE static bool unicode_isalpha(int character);
INLINE static bool unicode_isdigit(int character);
INLINE static bool unicode_ispunct(int character);
INLINE static bool unicode_islower(int character);
INLINE static bool unicode_isupper(int character);
INLINE static bool unicode_isspace(int character);
INLINE static int unicode_toupper(int character);
INLINE static int unicode_tolower(int character);
INLINE static bool unicode_isalpha(char32_t character);
INLINE static bool unicode_isdigit(char32_t character);
INLINE static bool unicode_ispunct(char32_t character);
INLINE static bool unicode_islower(char32_t character);
INLINE static bool unicode_isupper(char32_t character);
INLINE static bool unicode_isspace(char32_t character);
INLINE static int unicode_toupper(char32_t character);
INLINE static int unicode_tolower(char32_t character);
INLINE static std::string upper(const std::string &source);
INLINE static std::string upper(const std::string &source, Encoding encoding);

View File

@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = {
* Returns the Entry associated with the indicated character, if there is one.
*/
const UnicodeLatinMap::Entry *UnicodeLatinMap::
look_up(wchar_t character) {
look_up(char32_t character) {
if (!_initialized) {
init();
}

View File

@ -112,17 +112,17 @@ public:
class Entry {
public:
wchar_t _character;
char32_t _character;
CharType _char_type;
char _ascii_equiv;
char _ascii_additional;
wchar_t _tolower_character;
wchar_t _toupper_character;
char32_t _tolower_character;
char32_t _toupper_character;
AccentType _accent_type;
int _additional_flags;
};
static const Entry *look_up(wchar_t character);
static const Entry *look_up(char32_t character);
static wchar_t get_combining_accent(AccentType accent);
@ -130,7 +130,7 @@ private:
static void init();
static bool _initialized;
typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter;
typedef phash_map<char32_t, const Entry *, integer_hash<char32_t> > ByCharacter;
static ByCharacter *_by_character;
enum { max_direct_chars = 256 };
static const Entry *_direct_chars[max_direct_chars];