wstring utils

2025-10-05 03:15:07 -04:00 · 2006-03-09 22:36:24 +00:00 · 2006-03-09 22:36:24 +00:00 · 56b25dafe6
commit 56b25dafe6
parent 34cd6206a2
4 changed files with 141 additions and 0 deletions
--- a/panda/src/express/textEncoder.I
+++ b/panda/src/express/textEncoder.I
@ -371,6 +371,26 @@ unicode_isupper(int character) {
  return entry->_char_type == UnicodeLatinMap::CT_upper;
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: TextEncoder::unicode_isspace
 //       Access: Published, Static
 //  Description: Returns true if the indicated character is a
 //               whitespace letter, false otherwise.  This is akin to
 //               ctype's isspace(), extended to Unicode.
 ////////////////////////////////////////////////////////////////////
 INLINE bool TextEncoder::
 unicode_isspace(int character) {
  switch (character) {
  case ' ':
  case '\t':
  case '\n':
    return true;
  default:
    return false;
  }
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: TextEncoder::unicode_islower
 //       Access: Published, Static
--- a/panda/src/express/textEncoder.h
+++ b/panda/src/express/textEncoder.h
@ -79,6 +79,7 @@ PUBLISHED:
  INLINE static bool unicode_ispunct(int character);
  INLINE static bool unicode_islower(int character);
  INLINE static bool unicode_isupper(int character);
  INLINE static bool unicode_isspace(int character);
  INLINE static int unicode_toupper(int character);
  INLINE static int unicode_tolower(int character);
--- a/panda/src/putil/string_utils.cxx
+++ b/panda/src/putil/string_utils.cxx
@ -17,6 +17,7 @@
 ////////////////////////////////////////////////////////////////////
 #include "string_utils.h"
 #include "textEncoder.h"
 #include <ctype.h>
@ -131,6 +132,40 @@ extract_words(const string &str, vector_string &words) {
  return num_words;
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: extract_words
 //  Description: Divides the string into a number of words according
 //               to whitespace.  The words vector should be cleared by
 //               the user before calling; otherwise, the list of words
 //               in the string will be appended to the end of whatever
 //               was there before.
 //
 //               The return value is the number of words extracted.
 ////////////////////////////////////////////////////////////////////
 int
 extract_words(const wstring &str, pvector<wstring> &words) {
  int num_words = 0;
  size_t pos = 0;
  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
    pos++;
  }
  while (pos < str.length()) {
    size_t word_start = pos;
    while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
      pos++;
    }
    words.push_back(str.substr(word_start, pos - word_start));
    num_words++;
    while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
      pos++;
    }
  }
  return num_words;
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: tokenize
 //  Description: Chops the source string up into pieces delimited by
@ -158,6 +193,33 @@ tokenize(const string &str, vector_string &words, const string &delimiters) {
  words.push_back(string());
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: tokenize
 //  Description: Chops the source string up into pieces delimited by
 //               any of the characters specified in delimiters.
 //               Repeated delimiter characters represent zero-length
 //               tokens.
 //
 //               It is the user's responsibility to ensure the output
 //               vector is cleared before calling this function; the
 //               results will simply be appended to the end of the
 //               vector.
 ////////////////////////////////////////////////////////////////////
 void
 tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters) {
  size_t p = 0;
  while (p < str.length()) {
    size_t q = str.find_first_of(delimiters, p);
    if (q == string::npos) {
      words.push_back(str.substr(p));
      return;
    }
    words.push_back(str.substr(p, q - p));
    p = q + 1;
  }
  words.push_back(wstring());
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim_left
 //  Description: Returns a new string representing the contents of the
@ -173,6 +235,21 @@ trim_left(const string &str) {
  return str.substr(begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim_left
 //  Description: Returns a new string representing the contents of the
 //               given string with the leading whitespace removed.
 ////////////////////////////////////////////////////////////////////
 wstring
 trim_left(const wstring &str) {
  size_t begin = 0;
  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
    begin++;
  }
  return str.substr(begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim_right
 //  Description: Returns a new string representing the contents of the
@ -189,6 +266,22 @@ trim_right(const string &str) {
  return str.substr(begin, end - begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim_right
 //  Description: Returns a new string representing the contents of the
 //               given string with the trailing whitespace removed.
 ////////////////////////////////////////////////////////////////////
 wstring
 trim_right(const wstring &str) {
  size_t begin = 0;
  size_t end = str.size();
  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
    end--;
  }
  return str.substr(begin, end - begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim
 //  Description: Returns a new string representing the contents of the
@ -210,6 +303,27 @@ trim(const string &str) {
  return str.substr(begin, end - begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: trim
 //  Description: Returns a new string representing the contents of the
 //               given string with both leading and trailing
 //               whitespace removed.
 ////////////////////////////////////////////////////////////////////
 wstring
 trim(const wstring &str) {
  size_t begin = 0;
  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
    begin++;
  }
  size_t end = str.size();
  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
    end--;
  }
  return str.substr(begin, end - begin);
 }
 ////////////////////////////////////////////////////////////////////
 //     Function: string_to_int
 //  Description: A string-interface wrapper around the C library
--- a/panda/src/putil/string_utils.h
+++ b/panda/src/putil/string_utils.h
@ -39,15 +39,21 @@ EXPCL_PANDA string upcase(const string &s);
 // Separates the string into words according to whitespace.
 EXPCL_PANDA int extract_words(const string &str, vector_string &words);
 EXPCL_PANDA int extract_words(const wstring &str, pvector<wstring> &words);
 // Separates the string into words according to the indicated delimiters.
 EXPCL_PANDA void tokenize(const string &str, vector_string &words,
                          const string &delimiters);
 EXPCL_PANDA void tokenize(const wstring &str, pvector<wstring> &words,
                          const wstring &delimiters);
 // Trims leading and/or trailing whitespace from the string.
 EXPCL_PANDA string trim_left(const string &str);
 EXPCL_PANDA wstring trim_left(const wstring &str);
 EXPCL_PANDA string trim_right(const string &str);
 EXPCL_PANDA wstring trim_right(const wstring &str);
 EXPCL_PANDA string trim(const string &str);
 EXPCL_PANDA wstring trim(const wstring &str);
 // Functions to parse numeric values out of a string.
 EXPCL_PANDA int string_to_int(const string &str, string &tail);