diff --git a/panda/src/express/textEncoder.I b/panda/src/express/textEncoder.I index 30aa8ad847..8251dbb8ab 100644 --- a/panda/src/express/textEncoder.I +++ b/panda/src/express/textEncoder.I @@ -371,6 +371,26 @@ unicode_isupper(int character) { return entry->_char_type == UnicodeLatinMap::CT_upper; } +//////////////////////////////////////////////////////////////////// +// Function: TextEncoder::unicode_isspace +// Access: Published, Static +// Description: Returns true if the indicated character is a +// whitespace letter, false otherwise. This is akin to +// ctype's isspace(), extended to Unicode. +//////////////////////////////////////////////////////////////////// +INLINE bool TextEncoder:: +unicode_isspace(int character) { + switch (character) { + case ' ': + case '\t': + case '\n': + return true; + + default: + return false; + } +} + //////////////////////////////////////////////////////////////////// // Function: TextEncoder::unicode_islower // Access: Published, Static diff --git a/panda/src/express/textEncoder.h b/panda/src/express/textEncoder.h index 249ef3f162..6765653747 100644 --- a/panda/src/express/textEncoder.h +++ b/panda/src/express/textEncoder.h @@ -79,6 +79,7 @@ PUBLISHED: INLINE static bool unicode_ispunct(int character); INLINE static bool unicode_islower(int character); INLINE static bool unicode_isupper(int character); + INLINE static bool unicode_isspace(int character); INLINE static int unicode_toupper(int character); INLINE static int unicode_tolower(int character); diff --git a/panda/src/putil/string_utils.cxx b/panda/src/putil/string_utils.cxx index fa73534744..03d61da488 100644 --- a/panda/src/putil/string_utils.cxx +++ b/panda/src/putil/string_utils.cxx @@ -17,6 +17,7 @@ //////////////////////////////////////////////////////////////////// #include "string_utils.h" +#include "textEncoder.h" #include @@ -131,6 +132,40 @@ extract_words(const string &str, vector_string &words) { return num_words; } +//////////////////////////////////////////////////////////////////// +// Function: extract_words +// Description: Divides the string into a number of words according +// to whitespace. The words vector should be cleared by +// the user before calling; otherwise, the list of words +// in the string will be appended to the end of whatever +// was there before. +// +// The return value is the number of words extracted. +//////////////////////////////////////////////////////////////////// +int +extract_words(const wstring &str, pvector &words) { + int num_words = 0; + + size_t pos = 0; + while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) { + pos++; + } + while (pos < str.length()) { + size_t word_start = pos; + while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) { + pos++; + } + words.push_back(str.substr(word_start, pos - word_start)); + num_words++; + + while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) { + pos++; + } + } + + return num_words; +} + //////////////////////////////////////////////////////////////////// // Function: tokenize // Description: Chops the source string up into pieces delimited by @@ -158,6 +193,33 @@ tokenize(const string &str, vector_string &words, const string &delimiters) { words.push_back(string()); } +//////////////////////////////////////////////////////////////////// +// Function: tokenize +// Description: Chops the source string up into pieces delimited by +// any of the characters specified in delimiters. +// Repeated delimiter characters represent zero-length +// tokens. +// +// It is the user's responsibility to ensure the output +// vector is cleared before calling this function; the +// results will simply be appended to the end of the +// vector. +//////////////////////////////////////////////////////////////////// +void +tokenize(const wstring &str, pvector &words, const wstring &delimiters) { + size_t p = 0; + while (p < str.length()) { + size_t q = str.find_first_of(delimiters, p); + if (q == string::npos) { + words.push_back(str.substr(p)); + return; + } + words.push_back(str.substr(p, q - p)); + p = q + 1; + } + words.push_back(wstring()); +} + //////////////////////////////////////////////////////////////////// // Function: trim_left // Description: Returns a new string representing the contents of the @@ -173,6 +235,21 @@ trim_left(const string &str) { return str.substr(begin); } +//////////////////////////////////////////////////////////////////// +// Function: trim_left +// Description: Returns a new string representing the contents of the +// given string with the leading whitespace removed. +//////////////////////////////////////////////////////////////////// +wstring +trim_left(const wstring &str) { + size_t begin = 0; + while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) { + begin++; + } + + return str.substr(begin); +} + //////////////////////////////////////////////////////////////////// // Function: trim_right // Description: Returns a new string representing the contents of the @@ -189,6 +266,22 @@ trim_right(const string &str) { return str.substr(begin, end - begin); } +//////////////////////////////////////////////////////////////////// +// Function: trim_right +// Description: Returns a new string representing the contents of the +// given string with the trailing whitespace removed. +//////////////////////////////////////////////////////////////////// +wstring +trim_right(const wstring &str) { + size_t begin = 0; + size_t end = str.size(); + while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) { + end--; + } + + return str.substr(begin, end - begin); +} + //////////////////////////////////////////////////////////////////// // Function: trim // Description: Returns a new string representing the contents of the @@ -210,6 +303,27 @@ trim(const string &str) { return str.substr(begin, end - begin); } +//////////////////////////////////////////////////////////////////// +// Function: trim +// Description: Returns a new string representing the contents of the +// given string with both leading and trailing +// whitespace removed. +//////////////////////////////////////////////////////////////////// +wstring +trim(const wstring &str) { + size_t begin = 0; + while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) { + begin++; + } + + size_t end = str.size(); + while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) { + end--; + } + + return str.substr(begin, end - begin); +} + //////////////////////////////////////////////////////////////////// // Function: string_to_int // Description: A string-interface wrapper around the C library diff --git a/panda/src/putil/string_utils.h b/panda/src/putil/string_utils.h index ee3468a59c..5b10f9e118 100644 --- a/panda/src/putil/string_utils.h +++ b/panda/src/putil/string_utils.h @@ -39,15 +39,21 @@ EXPCL_PANDA string upcase(const string &s); // Separates the string into words according to whitespace. EXPCL_PANDA int extract_words(const string &str, vector_string &words); +EXPCL_PANDA int extract_words(const wstring &str, pvector &words); // Separates the string into words according to the indicated delimiters. EXPCL_PANDA void tokenize(const string &str, vector_string &words, const string &delimiters); +EXPCL_PANDA void tokenize(const wstring &str, pvector &words, + const wstring &delimiters); // Trims leading and/or trailing whitespace from the string. EXPCL_PANDA string trim_left(const string &str); +EXPCL_PANDA wstring trim_left(const wstring &str); EXPCL_PANDA string trim_right(const string &str); +EXPCL_PANDA wstring trim_right(const wstring &str); EXPCL_PANDA string trim(const string &str); +EXPCL_PANDA wstring trim(const wstring &str); // Functions to parse numeric values out of a string. EXPCL_PANDA int string_to_int(const string &str, string &tail);