wstring utils

This commit is contained in:
David Rose 2006-03-09 22:36:24 +00:00
parent 34cd6206a2
commit 56b25dafe6
4 changed files with 141 additions and 0 deletions

View File

@ -371,6 +371,26 @@ unicode_isupper(int character) {
return entry->_char_type == UnicodeLatinMap::CT_upper;
}
////////////////////////////////////////////////////////////////////
// Function: TextEncoder::unicode_isspace
// Access: Published, Static
// Description: Returns true if the indicated character is a
// whitespace letter, false otherwise. This is akin to
// ctype's isspace(), extended to Unicode.
////////////////////////////////////////////////////////////////////
INLINE bool TextEncoder::
unicode_isspace(int character) {
switch (character) {
case ' ':
case '\t':
case '\n':
return true;
default:
return false;
}
}
////////////////////////////////////////////////////////////////////
// Function: TextEncoder::unicode_islower
// Access: Published, Static

View File

@ -79,6 +79,7 @@ PUBLISHED:
INLINE static bool unicode_ispunct(int character);
INLINE static bool unicode_islower(int character);
INLINE static bool unicode_isupper(int character);
INLINE static bool unicode_isspace(int character);
INLINE static int unicode_toupper(int character);
INLINE static int unicode_tolower(int character);

View File

@ -17,6 +17,7 @@
////////////////////////////////////////////////////////////////////
#include "string_utils.h"
#include "textEncoder.h"
#include <ctype.h>
@ -131,6 +132,40 @@ extract_words(const string &str, vector_string &words) {
return num_words;
}
////////////////////////////////////////////////////////////////////
// Function: extract_words
// Description: Divides the string into a number of words according
// to whitespace. The words vector should be cleared by
// the user before calling; otherwise, the list of words
// in the string will be appended to the end of whatever
// was there before.
//
// The return value is the number of words extracted.
////////////////////////////////////////////////////////////////////
int
extract_words(const wstring &str, pvector<wstring> &words) {
int num_words = 0;
size_t pos = 0;
while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
pos++;
}
while (pos < str.length()) {
size_t word_start = pos;
while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
pos++;
}
words.push_back(str.substr(word_start, pos - word_start));
num_words++;
while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
pos++;
}
}
return num_words;
}
////////////////////////////////////////////////////////////////////
// Function: tokenize
// Description: Chops the source string up into pieces delimited by
@ -158,6 +193,33 @@ tokenize(const string &str, vector_string &words, const string &delimiters) {
words.push_back(string());
}
////////////////////////////////////////////////////////////////////
// Function: tokenize
// Description: Chops the source string up into pieces delimited by
// any of the characters specified in delimiters.
// Repeated delimiter characters represent zero-length
// tokens.
//
// It is the user's responsibility to ensure the output
// vector is cleared before calling this function; the
// results will simply be appended to the end of the
// vector.
////////////////////////////////////////////////////////////////////
void
tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters) {
size_t p = 0;
while (p < str.length()) {
size_t q = str.find_first_of(delimiters, p);
if (q == string::npos) {
words.push_back(str.substr(p));
return;
}
words.push_back(str.substr(p, q - p));
p = q + 1;
}
words.push_back(wstring());
}
////////////////////////////////////////////////////////////////////
// Function: trim_left
// Description: Returns a new string representing the contents of the
@ -173,6 +235,21 @@ trim_left(const string &str) {
return str.substr(begin);
}
////////////////////////////////////////////////////////////////////
// Function: trim_left
// Description: Returns a new string representing the contents of the
// given string with the leading whitespace removed.
////////////////////////////////////////////////////////////////////
wstring
trim_left(const wstring &str) {
size_t begin = 0;
while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
begin++;
}
return str.substr(begin);
}
////////////////////////////////////////////////////////////////////
// Function: trim_right
// Description: Returns a new string representing the contents of the
@ -189,6 +266,22 @@ trim_right(const string &str) {
return str.substr(begin, end - begin);
}
////////////////////////////////////////////////////////////////////
// Function: trim_right
// Description: Returns a new string representing the contents of the
// given string with the trailing whitespace removed.
////////////////////////////////////////////////////////////////////
wstring
trim_right(const wstring &str) {
size_t begin = 0;
size_t end = str.size();
while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
end--;
}
return str.substr(begin, end - begin);
}
////////////////////////////////////////////////////////////////////
// Function: trim
// Description: Returns a new string representing the contents of the
@ -210,6 +303,27 @@ trim(const string &str) {
return str.substr(begin, end - begin);
}
////////////////////////////////////////////////////////////////////
// Function: trim
// Description: Returns a new string representing the contents of the
// given string with both leading and trailing
// whitespace removed.
////////////////////////////////////////////////////////////////////
wstring
trim(const wstring &str) {
size_t begin = 0;
while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
begin++;
}
size_t end = str.size();
while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
end--;
}
return str.substr(begin, end - begin);
}
////////////////////////////////////////////////////////////////////
// Function: string_to_int
// Description: A string-interface wrapper around the C library

View File

@ -39,15 +39,21 @@ EXPCL_PANDA string upcase(const string &s);
// Separates the string into words according to whitespace.
EXPCL_PANDA int extract_words(const string &str, vector_string &words);
EXPCL_PANDA int extract_words(const wstring &str, pvector<wstring> &words);
// Separates the string into words according to the indicated delimiters.
EXPCL_PANDA void tokenize(const string &str, vector_string &words,
const string &delimiters);
EXPCL_PANDA void tokenize(const wstring &str, pvector<wstring> &words,
const wstring &delimiters);
// Trims leading and/or trailing whitespace from the string.
EXPCL_PANDA string trim_left(const string &str);
EXPCL_PANDA wstring trim_left(const wstring &str);
EXPCL_PANDA string trim_right(const string &str);
EXPCL_PANDA wstring trim_right(const wstring &str);
EXPCL_PANDA string trim(const string &str);
EXPCL_PANDA wstring trim(const wstring &str);
// Functions to parse numeric values out of a string.
EXPCL_PANDA int string_to_int(const string &str, string &tail);