prc: StreamReader should hold GIL for PyBytes_FromStringAndSize

This is due to python/cpython#21074, which causes a segfault in the latest Python master when creating a bytes object of size 0.

readlines() has been reimplemented to use a C++ vector in order to prevent constantly re-locking and unlocking the GIL for every line.
This commit is contained in:
rdb 2020-09-12 22:53:37 +02:00
parent 2402594808
commit 2b3a1d9d73
3 changed files with 67 additions and 16 deletions

View File

@ -66,10 +66,10 @@ PUBLISHED:
BLOCKING void skip_bytes(size_t size);
BLOCKING size_t extract_bytes(unsigned char *into, size_t size);
EXTENSION(BLOCKING PyObject *extract_bytes(size_t size));
EXTENSION(PyObject *extract_bytes(size_t size));
EXTENSION(BLOCKING PyObject *readline());
EXTENSION(BLOCKING PyObject *readlines());
EXTENSION(PyObject *readline());
EXTENSION(PyObject *readlines());
public:
BLOCKING vector_uchar extract_bytes(size_t size);

View File

@ -15,6 +15,8 @@
#ifdef HAVE_PYTHON
#include "vector_string.h"
/**
* Extracts the indicated number of bytes in the stream and returns them as a
* string (or bytes, in Python 3). Returns empty string at end-of-file.
@ -23,13 +25,25 @@ PyObject *Extension<StreamReader>::
extract_bytes(size_t size) {
std::istream *in = _this->get_istream();
if (in->eof() || in->fail() || size == 0) {
// Note that this is only safe to call with size 0 while the GIL is held.
return PyBytes_FromStringAndSize(nullptr, 0);
}
PyObject *bytes = PyBytes_FromStringAndSize(nullptr, size);
in->read(PyBytes_AS_STRING(bytes), size);
char *buffer = (char *)PyBytes_AS_STRING(bytes);
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
PyThreadState *_save;
Py_UNBLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
in->read(buffer, size);
size_t read_bytes = in->gcount();
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
Py_BLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
if (read_bytes == size || _PyBytes_Resize(&bytes, read_bytes) == 0) {
return bytes;
} else {
@ -47,6 +61,11 @@ extract_bytes(size_t size) {
*/
PyObject *Extension<StreamReader>::
readline() {
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
PyThreadState *_save;
Py_UNBLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
std::istream *in = _this->get_istream();
std::string line;
@ -60,6 +79,10 @@ readline() {
ch = in->get();
}
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
Py_BLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
#if PY_MAJOR_VERSION >= 3
return PyBytes_FromStringAndSize(line.data(), line.size());
#else
@ -73,22 +96,50 @@ readline() {
*/
PyObject *Extension<StreamReader>::
readlines() {
PyObject *lst = PyList_New(0);
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
PyThreadState *_save;
Py_UNBLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
std::istream *in = _this->get_istream();
vector_string lines;
while (true) {
std::string line;
int ch = in->get();
while (ch != EOF && !in->fail()) {
line += ch;
if (ch == '\n' || in->eof()) {
// Here's the newline character.
break;
}
ch = in->get();
}
if (line.empty()) {
break;
}
lines.push_back(std::move(line));
}
#if defined(HAVE_THREADS) && !defined(SIMPLE_THREADS)
Py_BLOCK_THREADS
#endif // HAVE_THREADS && !SIMPLE_THREADS
PyObject *lst = PyList_New(lines.size());
if (lst == nullptr) {
return nullptr;
}
PyObject *py_line = readline();
Py_ssize_t i = 0;
for (const std::string &line : lines) {
#if PY_MAJOR_VERSION >= 3
while (PyBytes_GET_SIZE(py_line) > 0) {
PyObject *py_line = PyBytes_FromStringAndSize(line.data(), line.size());
#else
while (PyString_GET_SIZE(py_line) > 0) {
PyObject *py_line = PyString_FromStringAndSize(line.data(), line.size());
#endif
PyList_Append(lst, py_line);
Py_DECREF(py_line);
py_line = readline();
PyList_SET_ITEM(lst, i++, py_line);
}
return lst;

View File

@ -29,9 +29,9 @@
template<>
class Extension<StreamReader> : public ExtensionBase<StreamReader> {
public:
BLOCKING PyObject *extract_bytes(size_t size);
BLOCKING PyObject *readline();
BLOCKING PyObject *readlines();
PyObject *extract_bytes(size_t size);
PyObject *readline();
PyObject *readlines();
};
#endif // HAVE_PYTHON