dtoolutil: fix TextEncoder methods for Python 3

The no-arguments get_text() and set_text() will now return Unicode strings in Python 3, but passing in an encoding will make them return/take bytes objects.

In Python 2, they all take regular strings, but Unicode is also accepted by the no-argument get_text() and set_text().

In the future we probably want to remove most of this interface for Python users, to whom all this is unnecessary since it duplicates functionality already in the standard library.
This commit is contained in:
rdb 2018-10-08 22:13:08 +02:00
parent 96860b88e0
commit 9061fd9416
11 changed files with 305 additions and 96 deletions

View File

@ -1,2 +1,3 @@
#include "filename_ext.cxx"
#include "globPattern_ext.cxx"
#include "textEncoder_ext.cxx"

View File

@ -90,6 +90,7 @@ set_text(const std::string &text) {
if (!has_text() || _text != text) {
_text = text;
_flags = (_flags | F_got_text) & ~F_got_wtext;
text_changed();
}
}
@ -101,7 +102,11 @@ set_text(const std::string &text) {
*/
INLINE void TextEncoder::
set_text(const std::string &text, TextEncoder::Encoding encoding) {
set_wtext(decode_text(text, encoding));
if (encoding == _encoding) {
set_text(text);
} else {
set_wtext(decode_text(text, encoding));
}
}
/**
@ -112,6 +117,7 @@ clear_text() {
_text = std::string();
_wtext = std::wstring();
_flags |= (F_got_text | F_got_wtext);
text_changed();
}
/**
@ -151,8 +157,11 @@ get_text(TextEncoder::Encoding encoding) const {
*/
INLINE void TextEncoder::
append_text(const std::string &text) {
_text = get_text() + text;
_flags = (_flags | F_got_text) & ~F_got_wtext;
if (!text.empty()) {
_text = get_text() + text;
_flags = (_flags | F_got_text) & ~F_got_wtext;
text_changed();
}
}
/**
@ -163,6 +172,7 @@ INLINE void TextEncoder::
append_unicode_char(int character) {
_wtext = get_wtext() + std::wstring(1, (wchar_t)character);
_flags = (_flags | F_got_wtext) & ~F_got_text;
text_changed();
}
/**
@ -200,6 +210,7 @@ set_unicode_char(size_t index, int character) {
if (index < _wtext.length()) {
_wtext[index] = character;
_flags &= ~F_got_text;
text_changed();
}
}
@ -418,6 +429,7 @@ set_wtext(const std::wstring &wtext) {
if (!has_text() || _wtext != wtext) {
_wtext = wtext;
_flags = (_flags | F_got_wtext) & ~F_got_text;
text_changed();
}
}
@ -439,8 +451,11 @@ get_wtext() const {
*/
INLINE void TextEncoder::
append_wtext(const std::wstring &wtext) {
_wtext = get_wtext() + wtext;
_flags = (_flags | F_got_wtext) & ~F_got_text;
if (!wtext.empty()) {
_wtext = get_wtext() + wtext;
_flags = (_flags | F_got_wtext) & ~F_got_text;
text_changed();
}
}
/**

View File

@ -35,6 +35,7 @@ make_upper() {
(*si) = unicode_toupper(*si);
}
_flags &= ~F_got_text;
text_changed();
}
/**
@ -49,6 +50,7 @@ make_lower() {
(*si) = unicode_tolower(*si);
}
_flags &= ~F_got_text;
text_changed();
}
/**
@ -314,6 +316,12 @@ expand_amp_sequence(StringDecoder &decoder) const {
}
*/
/**
* Called whenever the text has been changed.
*/
void TextEncoder::
text_changed() {
}
/**
*

View File

@ -48,17 +48,28 @@ PUBLISHED:
INLINE static Encoding get_default_encoding();
MAKE_PROPERTY(default_encoding, get_default_encoding, set_default_encoding);
#ifdef CPPPARSER
EXTEND void set_text(PyObject *text);
EXTEND void set_text(PyObject *text, Encoding encoding);
#else
INLINE void set_text(const std::string &text);
INLINE void set_text(const std::string &text, Encoding encoding);
#endif
INLINE void clear_text();
INLINE bool has_text() const;
void make_upper();
void make_lower();
#ifdef CPPPARSER
EXTEND PyObject *get_text() const;
EXTEND PyObject *get_text(Encoding encoding) const;
EXTEND void append_text(PyObject *text);
#else
INLINE std::string get_text() const;
INLINE std::string get_text(Encoding encoding) const;
INLINE void append_text(const std::string &text);
#endif
INLINE void append_unicode_char(int character);
INLINE size_t get_num_chars() const;
INLINE int get_unicode_char(size_t index) const;
@ -91,11 +102,24 @@ PUBLISHED:
std::wstring get_wtext_as_ascii() const;
bool is_wtext() const;
#ifdef CPPPARSER
EXTEND static PyObject *encode_wchar(wchar_t ch, Encoding encoding);
EXTEND INLINE PyObject *encode_wtext(const std::wstring &wtext) const;
EXTEND static PyObject *encode_wtext(const std::wstring &wtext, Encoding encoding);
EXTEND INLINE PyObject *decode_text(PyObject *text) const;
EXTEND static PyObject *decode_text(PyObject *text, Encoding encoding);
#else
static std::string encode_wchar(wchar_t ch, Encoding encoding);
INLINE std::string encode_wtext(const std::wstring &wtext) const;
static std::string encode_wtext(const std::wstring &wtext, Encoding encoding);
INLINE std::wstring decode_text(const std::string &text) const;
static std::wstring decode_text(const std::string &text, Encoding encoding);
#endif
MAKE_PROPERTY(text, get_text, set_text);
protected:
virtual void text_changed();
private:
enum Flags {

View File

@ -0,0 +1,30 @@
/**
* PANDA 3D SOFTWARE
* Copyright (c) Carnegie Mellon University. All rights reserved.
*
* All use of this software is subject to the terms of the revised BSD
* license. You should have received a copy of this license along
* with this source code in a file named "LICENSE."
*
* @file textEncoder_ext.I
* @author rdb
* @date 2018-10-08
*/
/**
* Encodes a wide-text string into a single-char string, according to the
* current encoding.
*/
INLINE PyObject *Extension<TextEncoder>::
encode_wtext(const std::wstring &wtext) const {
return encode_wtext(wtext, _this->get_encoding());
}
/**
* Returns the given wstring decoded to a single-byte string, via the current
* encoding system.
*/
INLINE PyObject *Extension<TextEncoder>::
decode_text(PyObject *text) const {
return decode_text(text, _this->get_encoding());
}

View File

@ -0,0 +1,159 @@
/**
* PANDA 3D SOFTWARE
* Copyright (c) Carnegie Mellon University. All rights reserved.
*
* All use of this software is subject to the terms of the revised BSD
* license. You should have received a copy of this license along
* with this source code in a file named "LICENSE."
*
* @file textEncoder_ext.cxx
* @author rdb
* @date 2018-09-29
*/
#include "textEncoder_ext.h"
#ifdef HAVE_PYTHON
/**
* Sets the text as a Unicode string. In Python 2, if a regular str is given,
* it is assumed to be in the TextEncoder's specified encoding.
*/
void Extension<TextEncoder>::
set_text(PyObject *text) {
if (PyUnicode_Check(text)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t len;
const char *str = PyUnicode_AsUTF8AndSize(text, &len);
_this->set_text(std::string(str, len), TextEncoder::E_utf8);
#else
Py_ssize_t len = PyUnicode_GET_SIZE(text);
wchar_t *str = (wchar_t *)alloca(sizeof(wchar_t) * (len + 1));
PyUnicode_AsWideChar((PyUnicodeObject *)text, str, len);
_this->set_wtext(std::wstring(str, len));
#endif
} else {
#if PY_MAJOR_VERSION >= 3
Dtool_Raise_TypeError("expected string");
#else
char *str;
Py_ssize_t len;
if (PyString_AsStringAndSize(text, (char **)&str, &len) != -1) {
_this->set_text(std::string(str, len));
}
#endif
}
}
/**
* Sets the text as an encoded byte string of the given encoding.
*/
void Extension<TextEncoder>::
set_text(PyObject *text, TextEncoder::Encoding encoding) {
char *str;
Py_ssize_t len;
if (PyBytes_AsStringAndSize(text, &str, &len) >= 0) {
_this->set_text(std::string(str, len), encoding);
}
}
/**
* Returns the text as a string. In Python 2, the returned string is in the
* TextEncoder's specified encoding. In Python 3, it is returned as unicode.
*/
PyObject *Extension<TextEncoder>::
get_text() const {
#if PY_MAJOR_VERSION >= 3
std::wstring text = _this->get_wtext();
return PyUnicode_FromWideChar(text.data(), (Py_ssize_t)text.size());
#else
std::string text = _this->get_text();
return PyString_FromStringAndSize((char *)text.data(), (Py_ssize_t)text.size());
#endif
}
/**
* Returns the text as a bytes object in the given encoding.
*/
PyObject *Extension<TextEncoder>::
get_text(TextEncoder::Encoding encoding) const {
std::string text = _this->get_text(encoding);
#if PY_MAJOR_VERSION >= 3
return PyBytes_FromStringAndSize((char *)text.data(), (Py_ssize_t)text.size());
#else
return PyString_FromStringAndSize((char *)text.data(), (Py_ssize_t)text.size());
#endif
}
/**
* Appends the text as a string (or Unicode object in Python 2).
*/
void Extension<TextEncoder>::
append_text(PyObject *text) {
if (PyUnicode_Check(text)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t len;
const char *str = PyUnicode_AsUTF8AndSize(text, &len);
_this->append_text(std::string(str, len));
#else
Py_ssize_t len = PyUnicode_GET_SIZE(text);
wchar_t *str = (wchar_t *)alloca(sizeof(wchar_t) * (len + 1));
PyUnicode_AsWideChar((PyUnicodeObject *)text, str, len);
_this->append_wtext(std::wstring(str, len));
#endif
} else {
#if PY_MAJOR_VERSION >= 3
Dtool_Raise_TypeError("expected string");
#else
char *str;
Py_ssize_t len;
if (PyString_AsStringAndSize(text, (char **)&str, &len) != -1) {
_this->append_text(std::string(str, len));
}
#endif
}
}
/**
* Encodes the given wide character as byte string in the given encoding.
*/
PyObject *Extension<TextEncoder>::
encode_wchar(char32_t ch, TextEncoder::Encoding encoding) {
std::string value = TextEncoder::encode_wchar(ch, encoding);
#if PY_MAJOR_VERSION >= 3
return PyBytes_FromStringAndSize((char *)value.data(), (Py_ssize_t)value.size());
#else
return PyString_FromStringAndSize((char *)value.data(), (Py_ssize_t)value.size());
#endif
}
/**
* Encodes a wide-text string into a single-char string, according to the
* given encoding.
*/
PyObject *Extension<TextEncoder>::
encode_wtext(const wstring &wtext, TextEncoder::Encoding encoding) {
std::string value = TextEncoder::encode_wtext(wtext, encoding);
#if PY_MAJOR_VERSION >= 3
return PyBytes_FromStringAndSize((char *)value.data(), (Py_ssize_t)value.size());
#else
return PyString_FromStringAndSize((char *)value.data(), (Py_ssize_t)value.size());
#endif
}
/**
* Returns the given wstring decoded to a single-byte string, via the given
* encoding system.
*/
PyObject *Extension<TextEncoder>::
decode_text(PyObject *text, TextEncoder::Encoding encoding) {
char *str;
Py_ssize_t len;
if (PyBytes_AsStringAndSize(text, &str, &len) >= 0) {
return Dtool_WrapValue(TextEncoder::decode_text(std::string(str, len), encoding));
} else {
return nullptr;
}
}
#endif // HAVE_PYTHON

View File

@ -0,0 +1,50 @@
/**
* PANDA 3D SOFTWARE
* Copyright (c) Carnegie Mellon University. All rights reserved.
*
* All use of this software is subject to the terms of the revised BSD
* license. You should have received a copy of this license along
* with this source code in a file named "LICENSE."
*
* @file textEncoder_ext.h
* @author rdb
* @date 2018-09-29
*/
#ifndef TEXTENCODER_EXT_H
#define TEXTENCODER_EXT_H
#include "dtoolbase.h"
#ifdef HAVE_PYTHON
#include "extension.h"
#include "textEncoder.h"
#include "py_panda.h"
/**
* This class defines the extension methods for TextEncoder, which are called
* instead of any C++ methods with the same prototype.
*/
template<>
class Extension<TextEncoder> : public ExtensionBase<TextEncoder> {
public:
void set_text(PyObject *text);
void set_text(PyObject *text, TextEncoder::Encoding encoding);
PyObject *get_text() const;
PyObject *get_text(TextEncoder::Encoding encoding) const;
void append_text(PyObject *text);
static PyObject *encode_wchar(char32_t ch, TextEncoder::Encoding encoding);
INLINE PyObject *encode_wtext(const std::wstring &wtext) const;
static PyObject *encode_wtext(const std::wstring &wtext, TextEncoder::Encoding encoding);
INLINE PyObject *decode_text(PyObject *text) const;
static PyObject *decode_text(PyObject *text, TextEncoder::Encoding encoding);
};
#include "textEncoder_ext.I"
#endif // HAVE_PYTHON
#endif // TEXTENCODER_EXT_H

View File

@ -3551,6 +3551,7 @@ IGATEFILES += [
"dSearchPath.h",
"executionEnvironment.h",
"textEncoder.h",
"textEncoder_ext.h",
"filename.h",
"filename_ext.h",
"globPattern.h",

View File

@ -1010,61 +1010,6 @@ clear_glyph_shift() {
invalidate_with_measure();
}
/**
* Changes the text that is displayed under the TextNode.
*/
INLINE void TextNode::
set_text(const std::string &text) {
MutexHolder holder(_lock);
TextEncoder::set_text(text);
invalidate_with_measure();
}
/**
* The two-parameter version of set_text() accepts an explicit encoding; the
* text is immediately decoded and stored as a wide-character string.
* Subsequent calls to get_text() will return the same text re-encoded using
* whichever encoding is specified by set_encoding().
*/
INLINE void TextNode::
set_text(const std::string &text, TextNode::Encoding encoding) {
MutexHolder holder(_lock);
TextEncoder::set_text(text, encoding);
invalidate_with_measure();
}
/**
* Removes the text from the TextNode.
*/
INLINE void TextNode::
clear_text() {
MutexHolder holder(_lock);
TextEncoder::clear_text();
invalidate_with_measure();
}
/**
* Appends the indicates string to the end of the stored text.
*/
INLINE void TextNode::
append_text(const std::string &text) {
MutexHolder holder(_lock);
TextEncoder::append_text(text);
invalidate_with_measure();
}
/**
* Appends a single character to the end of the stored text. This may be a
* wide character, up to 16 bits in Unicode.
*/
INLINE void TextNode::
append_unicode_char(wchar_t character) {
MutexHolder holder(_lock);
TextEncoder::append_unicode_char(character);
invalidate_with_measure();
}
/**
* Returns a string that represents the contents of the text, as it has been
* formatted by wordwrap rules.
@ -1086,28 +1031,6 @@ calc_width(const std::string &line) const {
return calc_width(decode_text(line));
}
/**
* Changes the text that is displayed under the TextNode, with a wide text.
* This automatically sets the string reported by get_text() to the 8-bit
* encoded version of the same string.
*/
INLINE void TextNode::
set_wtext(const std::wstring &wtext) {
MutexHolder holder(_lock);
TextEncoder::set_wtext(wtext);
invalidate_with_measure();
}
/**
* Appends the indicates string to the end of the stored wide-character text.
*/
INLINE void TextNode::
append_wtext(const std::wstring &wtext) {
MutexHolder holder(_lock);
TextEncoder::append_wtext(wtext);
invalidate_with_measure();
}
/**
* Returns a wstring that represents the contents of the text, as it has been
* formatted by wordwrap rules.

View File

@ -319,6 +319,15 @@ get_internal_geom() const {
return do_get_internal_geom();
}
/**
* Called whenever the text has been changed.
*/
void TextNode::
text_changed() {
MutexHolder holder(_lock);
invalidate_with_measure();
}
/**
* Returns the union of all attributes from SceneGraphReducer::AttribTypes
* that may not safely be applied to the vertices of this node. If this is

View File

@ -182,14 +182,6 @@ PUBLISHED:
INLINE void set_glyph_shift(PN_stdfloat glyph_shift);
INLINE void clear_glyph_shift();
// These methods are inherited from TextEncoder, but we override here so we
// can flag the TextNode as dirty when they have been changed.
INLINE void set_text(const std::string &text);
INLINE void set_text(const std::string &text, Encoding encoding);
INLINE void clear_text();
INLINE void append_text(const std::string &text);
INLINE void append_unicode_char(wchar_t character);
// After the text has been set, you can query this to determine how it will
// be wordwrapped.
INLINE std::string get_wordwrapped_text() const;
@ -203,10 +195,6 @@ PUBLISHED:
bool has_character(wchar_t character) const;
bool is_whitespace(wchar_t character) const;
// Direct support for wide-character strings.
INLINE void set_wtext(const std::wstring &wtext);
INLINE void append_wtext(const std::wstring &text);
INLINE std::wstring get_wordwrapped_wtext() const;
PN_stdfloat calc_width(const std::wstring &line) const;
@ -245,8 +233,6 @@ PUBLISHED:
MAKE_PROPERTY(usage_hint, get_usage_hint, set_usage_hint);
MAKE_PROPERTY(flatten_flags, get_flatten_flags, set_flatten_flags);
MAKE_PROPERTY(text, get_text, set_text);
MAKE_PROPERTY2(font, has_font, get_font, set_font, clear_font);
MAKE_PROPERTY2(small_caps, has_small_caps, get_small_caps,
set_small_caps, clear_small_caps);
@ -281,6 +267,9 @@ PUBLISHED:
set_text_scale, clear_text_scale);
public:
// From parent class TextEncoder;
virtual void text_changed() final;
// From parent class PandaNode
virtual int get_unsafe_to_apply_attribs() const;
virtual void apply_attribs_to_vertices(const AccumulatedAttribs &attribs,