preparing for unicode filenames

This commit is contained in:
David Rose 2011-08-29 04:33:00 +00:00
parent 9a967e9edb
commit 6ecf2fed29
20 changed files with 234 additions and 67 deletions

View File

@ -21,6 +21,9 @@
pandaSystem.h pandaVersion.h \
panda_getopt.h panda_getopt_long.h panda_getopt_impl.h \
pfstream.h pfstream.I pfstreamBuf.h \
stringDecoder.h stringDecoder.I \
textEncoder.h textEncoder.I \
unicodeLatinMap.h \
vector_string.h \
vector_src.h
@ -33,6 +36,9 @@
pandaSystem.cxx \
panda_getopt_impl.cxx \
pfstreamBuf.cxx pfstream.cxx \
stringDecoder.cxx \
textEncoder.cxx \
unicodeLatinMap.cxx \
vector_string.cxx
#define INSTALL_HEADERS \
@ -45,6 +51,9 @@
pandaSystem.h pandaVersion.h \
panda_getopt.h panda_getopt_long.h panda_getopt_impl.h \
pfstream.h pfstream.I pfstreamBuf.h \
stringDecoder.h stringDecoder.I \
textEncoder.h textEncoder.I \
unicodeLatinMap.h \
vector_string.h \
vector_src.cxx vector_src.h
#end lib_target

View File

@ -2,7 +2,10 @@
#include "pandaFileStreamBuf.cxx"
#include "panda_getopt_impl.cxx"
#include "executionEnvironment.cxx"
#include "vector_string.cxx"
#include "pfstream.cxx"
#include "pfstreamBuf.cxx"
#include "stringDecoder.cxx"
#include "textEncoder.cxx"
#include "unicodeLatinMap.cxx"
#include "vector_string.cxx"

View File

@ -23,6 +23,17 @@ Filename(const string &filename) {
(*this) = filename;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::Constructor
// Access: Published
// Description:
////////////////////////////////////////////////////////////////////
INLINE Filename::
Filename(const wstring &filename) {
_flags = 0;
(*this) = filename;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::Constructor
// Access: Published
@ -139,6 +150,19 @@ operator = (const string &filename) {
return *this;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::Assignment operator
// Access: Published
// Description:
////////////////////////////////////////////////////////////////////
INLINE Filename &Filename::
operator = (const wstring &filename) {
TextEncoder encoder;
encoder.set_encoding(get_filesystem_encoding());
encoder.set_wtext(filename);
return operator = (encoder.get_text());
}
////////////////////////////////////////////////////////////////////
// Function: Filename::Assignment operator
// Access: Published
@ -264,12 +288,25 @@ operator + (const string &other) const {
// Access: Published
// Description: Returns the entire filename: directory, basename,
// extension. This is the same thing returned by the
// string typecast operator, so this function is a
// little redundant.
// string typecast operator.
////////////////////////////////////////////////////////////////////
INLINE string Filename::
get_fullpath() const {
return _filename.c_str();
return _filename;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::get_fullpath_w
// Access: Published
// Description: Returns the entire filename as a wide-character
// string.
////////////////////////////////////////////////////////////////////
INLINE wstring Filename::
get_fullpath_w() const {
TextEncoder encoder;
encoder.set_encoding(get_filesystem_encoding());
encoder.set_text(get_fullpath());
return encoder.get_wtext();
}
////////////////////////////////////////////////////////////////////
@ -611,3 +648,29 @@ INLINE void Filename::
output(ostream &out) const {
out << _filename;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::set_filesystem_encoding
// Access: Published, Static
// Description: Specifies the default encoding to be used for all
// subsequent Filenames. This is used to represent
// wide-character (Unicode) filenames internally. On
// non-Windows-based systems, the encoded filename is
// also passed to the underlying operating system.
////////////////////////////////////////////////////////////////////
INLINE void Filename::
set_filesystem_encoding(TextEncoder::Encoding encoding) {
_filesystem_encoding = encoding;
}
////////////////////////////////////////////////////////////////////
// Function: Filename::get_filesystem_encoding
// Access: Published, Static
// Description: Specifies the default encoding to be used for all
// subsequent Filenames objects. See
// set_filesystem_encoding().
////////////////////////////////////////////////////////////////////
INLINE TextEncoder::Encoding Filename::
get_filesystem_encoding() {
return _filesystem_encoding;
}

View File

@ -49,6 +49,8 @@
#include <unistd.h>
#endif
TextEncoder::Encoding Filename::_filesystem_encoding = TextEncoder::E_utf8;
Filename *Filename::_home_directory;
Filename *Filename::_temp_directory;
Filename *Filename::_user_appdata_directory;
@ -408,6 +410,21 @@ from_os_specific(const string &os_specific, Filename::Type type) {
#endif // WIN32
}
////////////////////////////////////////////////////////////////////
// Function: Filename::from_os_specific_w
// Access: Published, Static
// Description: The wide-string variant of from_os_specific().
// Returns a new Filename, converted from an os-specific
// wide-character string.
////////////////////////////////////////////////////////////////////
Filename Filename::
from_os_specific_w(const wstring &os_specific, Filename::Type type) {
TextEncoder encoder;
encoder.set_encoding(get_filesystem_encoding());
encoder.set_wtext(os_specific);
return from_os_specific(encoder.get_text(), type);
}
////////////////////////////////////////////////////////////////////
// Function: Filename::expand_from
// Access: Published, Static
@ -1213,6 +1230,19 @@ to_os_specific() const {
#endif // WIN32
}
////////////////////////////////////////////////////////////////////
// Function: Filename::to_os_specific_w
// Access: Published
// Description: The wide-string variant on to_os_specific().
////////////////////////////////////////////////////////////////////
wstring Filename::
to_os_specific_w() const {
TextEncoder encoder;
encoder.set_encoding(get_filesystem_encoding());
encoder.set_text(to_os_specific());
return encoder.get_wtext();
}
////////////////////////////////////////////////////////////////////
// Function: Filename::to_os_generic
// Access: Published

View File

@ -20,6 +20,7 @@
#include "typeHandle.h"
#include "register_type.h"
#include "vector_string.h"
#include "textEncoder.h"
#include <assert.h>
@ -61,6 +62,7 @@ public:
PUBLISHED:
INLINE Filename(const string &filename = "");
INLINE Filename(const wstring &filename);
INLINE Filename(const char *filename);
INLINE Filename(const Filename &copy);
Filename(const Filename &dirname, const Filename &basename);
@ -82,6 +84,8 @@ PUBLISHED:
static Filename from_os_specific(const string &os_specific,
Type type = T_general);
static Filename from_os_specific_w(const wstring &os_specific,
Type type = T_general);
static Filename expand_from(const string &user_string,
Type type = T_general);
static Filename temporary(const string &dirname, const string &prefix,
@ -95,6 +99,7 @@ PUBLISHED:
// Assignment is via the = operator.
INLINE Filename &operator = (const string &filename);
INLINE Filename &operator = (const wstring &filename);
INLINE Filename &operator = (const char *filename);
INLINE Filename &operator = (const Filename &copy);
@ -111,6 +116,7 @@ PUBLISHED:
// Or, you can use any of these.
INLINE string get_fullpath() const;
INLINE wstring get_fullpath_w() const;
INLINE string get_dirname() const;
INLINE string get_basename() const;
INLINE string get_fullpath_wo_extension() const;
@ -161,6 +167,7 @@ PUBLISHED:
bool make_true_case();
string to_os_specific() const;
wstring to_os_specific_w() const;
string to_os_generic() const;
string to_os_short_name() const;
string to_os_long_name() const;
@ -220,6 +227,9 @@ PUBLISHED:
INLINE void output(ostream &out) const;
INLINE static void set_filesystem_encoding(TextEncoder::Encoding encoding);
INLINE static TextEncoder::Encoding get_filesystem_encoding();
public:
bool atomic_compare_and_exchange_contents(string &orig_contents, const string &old_contents, const string &new_contents) const;
bool atomic_read_contents(string &contents) const;
@ -244,6 +254,7 @@ protected:
int _flags;
static TextEncoder::Encoding _filesystem_encoding;
static Filename *_home_directory;
static Filename *_temp_directory;
static Filename *_user_appdata_directory;

View File

@ -13,7 +13,9 @@
////////////////////////////////////////////////////////////////////
#include "stringDecoder.h"
#include "config_express.h"
#include "config_dtoolutil.h"
ostream *StringDecoder::_notify_ptr = &cerr;
////////////////////////////////////////////////////////////////////
// Function: StringDecoder::Destructor
@ -37,6 +39,32 @@ get_next_character() {
return (unsigned char)_input[_p++];
}
////////////////////////////////////////////////////////////////////
// Function: StringDecoder::set_notify_ptr
// Access: Public, Static
// Description: Sets the ostream that is used to write error messages
// to. This is necessary because of the low-level
// placement of this class, before the definition of the
// NotifyCategory class, so it cannot specify its own
// notify.
////////////////////////////////////////////////////////////////////
void StringDecoder::
set_notify_ptr(ostream *notify_ptr) {
_notify_ptr = notify_ptr;
}
////////////////////////////////////////////////////////////////////
// Function: StringDecoder::get_notify_ptr
// Access: Public, Static
// Description: Returns the ostream that is used to write error messages
// to. See set_notify_ptr().
////////////////////////////////////////////////////////////////////
ostream *StringDecoder::
get_notify_ptr() {
return _notify_ptr;
}
/*
In UTF-8, each 16-bit Unicode character is encoded as a sequence of
one, two, or three 8-bit bytes, depending on the value of the
@ -79,8 +107,10 @@ get_next_character() {
// First byte of two.
unsigned int two = 0;
if (test_eof()) {
express_cat.warning()
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
if (_notify_ptr != NULL) {
(*_notify_ptr)
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
}
return -1;
}
two = (unsigned char)_input[_p++];
@ -90,14 +120,18 @@ get_next_character() {
} else if ((result & 0xf0) == 0xe0) {
// First byte of three.
if (test_eof()) {
express_cat.warning()
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
if (_notify_ptr != NULL) {
(*_notify_ptr)
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
}
return -1;
}
unsigned int two = (unsigned char)_input[_p++];
if (test_eof()) {
express_cat.warning()
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
if (_notify_ptr != NULL) {
(*_notify_ptr)
<< "utf-8 encoded string '" << _input << "' ends abruptly.\n";
}
return -1;
}
unsigned int three = (unsigned char)_input[_p++];
@ -107,10 +141,12 @@ get_next_character() {
// Otherwise--the high bit is set but it is not one of the
// introductory utf-8 bytes--we have an error.
express_cat.warning()
<< "Non utf-8 byte in string: 0x" << hex << result << dec
<< ", string is '" << _input << "'\n";
nassertr(false, -1);
if (_notify_ptr != NULL) {
(*_notify_ptr)
<< "Non utf-8 byte in string: 0x" << hex << result << dec
<< ", string is '" << _input << "'\n";
}
return -1;
}
// End of string reached.
@ -130,8 +166,10 @@ get_next_character() {
unsigned int high = (unsigned char)_input[_p++];
if (test_eof()) {
express_cat.warning()
<< "Unicode-encoded string has odd number of bytes.\n";
if (_notify_ptr != NULL) {
(*_notify_ptr)
<< "Unicode-encoded string has odd number of bytes.\n";
}
return -1;
}
unsigned int low = (unsigned char)_input[_p++];

View File

@ -15,8 +15,7 @@
#ifndef STRINGDECODER_H
#define STRINGDECODER_H
#include "pandabase.h"
#include "dtoolbase.h"
////////////////////////////////////////////////////////////////////
// Class : StringDecoder
@ -34,12 +33,16 @@ public:
virtual int get_next_character();
INLINE bool is_eof();
static void set_notify_ptr(ostream *ptr);
static ostream *get_notify_ptr();
protected:
INLINE bool test_eof();
string _input;
size_t _p;
bool _eof;
static ostream *_notify_ptr;
};
////////////////////////////////////////////////////////////////////

View File

@ -231,8 +231,10 @@ get_num_chars() const {
INLINE int TextEncoder::
get_unicode_char(int index) const {
get_wtext();
nassertr(index >= 0 && index < (int)_wtext.length(), 0);
return _wtext[index];
if (index >= 0 && index < (int)_wtext.length()) {
return _wtext[index];
}
return 0;
}
////////////////////////////////////////////////////////////////////
@ -246,9 +248,10 @@ get_unicode_char(int index) const {
INLINE void TextEncoder::
set_unicode_char(int index, int character) {
get_wtext();
nassertv(index >= 0 && index < (int)_wtext.length());
_wtext[index] = character;
_flags &= ~F_got_text;
if (index >= 0 && index < (int)_wtext.length()) {
_wtext[index] = character;
_flags &= ~F_got_text;
}
}
////////////////////////////////////////////////////////////////////

View File

@ -15,13 +15,9 @@
#include "textEncoder.h"
#include "stringDecoder.h"
#include "unicodeLatinMap.h"
#include "config_express.h"
#include "config_dtoolutil.h"
TypeHandle TextEncoder::_type_handle;
ConfigVariableEnum<TextEncoder::Encoding> TextEncoder::_default_encoding
("text-encoding", TextEncoder::E_iso8859,
PRC_DESC("Specifies how international characters are represented in strings "
"of 8-byte characters presented to Panda. See TextEncoder::set_encoding()."));
TextEncoder::Encoding TextEncoder::_default_encoding = TextEncoder::E_iso8859;
////////////////////////////////////////////////////////////////////
// Function: TextEncoder::make_upper
@ -379,10 +375,26 @@ operator >> (istream &in, TextEncoder::Encoding &encoding) {
} else if (word == "unicode") {
encoding = TextEncoder::E_unicode;
} else {
express_cat.error()
<< "Invalid TextEncoder::Encoding: " << word << "\n";
ostream *notify_ptr = StringDecoder::get_notify_ptr();
if (notify_ptr != (ostream *)NULL) {
(*notify_ptr)
<< "Invalid TextEncoder::Encoding: " << word << "\n";
}
encoding = TextEncoder::E_iso8859;
}
return in;
}
////////////////////////////////////////////////////////////////////
// Function: wstring ostream operator
// Description: Uses the current default encoding to output the
// wstring.
////////////////////////////////////////////////////////////////////
ostream &
operator << (ostream &out, const wstring &str) {
TextEncoder encoder;
encoder.set_wtext(str);
out << encoder.get_text();
return out;
}

View File

@ -15,10 +15,8 @@
#ifndef TEXTENCODER_H
#define TEXTENCODER_H
#include "pandabase.h"
#include "dtoolbase.h"
#include "unicodeLatinMap.h"
#include "configVariableEnum.h"
#include "typedObject.h"
class StringDecoder;
@ -34,7 +32,7 @@ class StringDecoder;
// This class is also a base class of TextNode, which
// inherits this functionality.
////////////////////////////////////////////////////////////////////
class EXPCL_PANDAEXPRESS TextEncoder {
class EXPCL_DTOOL TextEncoder {
PUBLISHED:
enum Encoding {
E_iso8859,
@ -112,25 +110,19 @@ private:
string _text;
wstring _wtext;
static ConfigVariableEnum<Encoding> _default_encoding;
public:
static TypeHandle get_class_type() {
return _type_handle;
}
static void init_type() {
register_type(_type_handle, "TextEncoder");
}
private:
static TypeHandle _type_handle;
static Encoding _default_encoding;
};
EXPCL_PANDAEXPRESS ostream &
EXPCL_DTOOL ostream &
operator << (ostream &out, TextEncoder::Encoding encoding);
EXPCL_PANDAEXPRESS istream &
EXPCL_DTOOL istream &
operator >> (istream &in, TextEncoder::Encoding &encoding);
// We'll define the output operator for wstring here, too. Presumably
// this will not be automatically defined by any system libraries.
EXPCL_DTOOL ostream &
operator << (ostream &out, const wstring &str);
#include "textEncoder.I"
#endif

View File

@ -15,7 +15,7 @@
#ifndef UNICODELATINMAP_H
#define UNICODELATINMAP_H
#include "pandabase.h"
#include "dtoolbase.h"
#include "pmap.h"
////////////////////////////////////////////////////////////////////
@ -30,7 +30,7 @@
// case from upper to lower while retaining the Unicode
// accent marks.
////////////////////////////////////////////////////////////////////
class EXPCL_PANDAEXPRESS UnicodeLatinMap {
class EXPCL_DTOOL UnicodeLatinMap {
public:
enum AccentType {
AT_none,

View File

@ -22,6 +22,8 @@
#include "config_prc.h"
#include "pfstream.h"
#include "pandaSystem.h"
#include "textEncoder.h"
#include "stringDecoder.h"
// This file is generated by ppremake.
#include "prc_parameters.h"
@ -682,4 +684,18 @@ config_initialized() {
panda_sys->set_package_version_string(panda_package_version);
panda_sys->set_package_host_url(panda_package_host_url);
#endif // NDEBUG
// Also set up some other low-level things.
ConfigVariableEnum<TextEncoder::Encoding> text_encoding
("text-encoding", TextEncoder::E_iso8859,
PRC_DESC("Specifies how international characters are represented in strings "
"of 8-byte characters presented to Panda. See TextEncoder::set_encoding()."));
TextEncoder::set_default_encoding(text_encoding);
ConfigVariableEnum<TextEncoder::Encoding> filesystem_encoding
("filesystem-encoding", TextEncoder::E_utf8,
PRC_DESC("Specifies the default encoding used for wide-character filenames."));
Filename::set_filesystem_encoding(filesystem_encoding);
StringDecoder::set_notify_ptr(&Notify::out());
}

View File

@ -14,6 +14,7 @@
#include "config_prc.h"
#include "configVariableBool.h"
#include "configVariableEnum.h"
#include "pandaFileStreamBuf.h"
NotifyCategoryDef(prc, "");

View File

@ -52,16 +52,13 @@
pta_uchar.h pta_float.h \
ramfile.I ramfile.h \
referenceCount.I referenceCount.h \
stringDecoder.h stringDecoder.I \
subStream.I subStream.h subStreamBuf.h \
subfileInfo.h subfileInfo.I \
temporaryFile.h temporaryFile.I \
textEncoder.h textEncoder.I \
threadSafePointerTo.I threadSafePointerTo.h \
threadSafePointerToBase.I threadSafePointerToBase.h \
trueClock.I trueClock.h \
typedReferenceCount.I typedReferenceCount.h typedef.h \
unicodeLatinMap.h \
vector_uchar.h vector_float.h \
virtualFile.I virtualFileList.I virtualFileList.h virtualFileMount.h \
virtualFileComposite.h virtualFileComposite.I virtualFile.h \
@ -109,16 +106,13 @@
pta_uchar.cxx pta_float.cxx \
ramfile.cxx \
referenceCount.cxx \
stringDecoder.cxx \
subStream.cxx subStreamBuf.cxx \
subfileInfo.cxx \
temporaryFile.cxx \
textEncoder.cxx \
threadSafePointerTo.cxx \
threadSafePointerToBase.cxx \
trueClock.cxx \
typedReferenceCount.cxx \
unicodeLatinMap.cxx \
vector_uchar.cxx vector_float.cxx \
virtualFileComposite.cxx virtualFile.cxx virtualFileList.cxx \
virtualFileMount.cxx \
@ -175,16 +169,13 @@
pta_uchar.h pta_float.h \
ramfile.I ramfile.h \
referenceCount.I referenceCount.h \
stringDecoder.h stringDecoder.I \
subStream.I subStream.h subStreamBuf.h \
subfileInfo.h subfileInfo.I \
temporaryFile.h temporaryFile.I \
textEncoder.h textEncoder.I \
threadSafePointerTo.I threadSafePointerTo.h \
threadSafePointerToBase.I threadSafePointerToBase.h \
trueClock.I trueClock.h \
typedReferenceCount.I typedReferenceCount.h typedef.h \
unicodeLatinMap.h \
vector_uchar.h vector_float.h \
virtualFile.I virtualFileList.I virtualFileList.h virtualFileMount.h \
virtualFileComposite.h virtualFileComposite.I virtualFile.h \

View File

@ -2,6 +2,7 @@ forcetype PandaSystem
forcetype DSearchPath
forcetype DSearchPath::Results
forcetype ExecutionEnvironment
forcetype TextEncoder
forcetype Filename
forcetype GlobPattern
forcetype Notify

View File

@ -100,7 +100,6 @@ init_libexpress() {
Namable::init_type();
NodeReferenceCount::init_type();
ReferenceCount::init_type();
TextEncoder::init_type();
TypedObject::init_type();
TypedReferenceCount::init_type();
VirtualFile::init_type();

View File

@ -3,17 +3,14 @@
#include "pta_float.cxx"
#include "ramfile.cxx"
#include "referenceCount.cxx"
#include "stringDecoder.cxx"
#include "subfileInfo.cxx"
#include "subStream.cxx"
#include "subStreamBuf.cxx"
#include "temporaryFile.cxx"
#include "textEncoder.cxx"
#include "threadSafePointerTo.cxx"
#include "threadSafePointerToBase.cxx"
#include "trueClock.cxx"
#include "typedReferenceCount.cxx"
#include "unicodeLatinMap.cxx"
#include "vector_uchar.cxx"
#include "vector_float.cxx"
#include "virtualFile.cxx"

View File

@ -327,11 +327,9 @@ public:
}
static void init_type() {
PandaNode::init_type();
TextEncoder::init_type();
TextProperties::init_type();
register_type(_type_handle, "TextNode",
PandaNode::get_class_type(),
TextEncoder::get_class_type(),
TextProperties::get_class_type());
}
virtual TypeHandle get_type() const {