From e9c76328fb1a562e66792e4bf3bb2d8689c00565 Mon Sep 17 00:00:00 2001 From: rdb Date: Wed, 8 Jun 2016 13:00:44 +0200 Subject: [PATCH 1/4] Fix a crash in PythonTask destructor --- panda/src/event/pythonTask.cxx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/panda/src/event/pythonTask.cxx b/panda/src/event/pythonTask.cxx index a5ea42c1a7..3556395c65 100644 --- a/panda/src/event/pythonTask.cxx +++ b/panda/src/event/pythonTask.cxx @@ -68,9 +68,9 @@ PythonTask(PyObject *function, const string &name) : //////////////////////////////////////////////////////////////////// PythonTask:: ~PythonTask() { - Py_DECREF(_function); - Py_DECREF(_args); - Py_DECREF(__dict__); + Py_XDECREF(_function); + Py_XDECREF(_args); + Py_XDECREF(__dict__); Py_XDECREF(_generator); Py_XDECREF(_owner); Py_XDECREF(_upon_death); From b303962e966183c4c3dfc2643b9fc96d083f66f9 Mon Sep 17 00:00:00 2001 From: rdb Date: Thu, 9 Jun 2016 16:48:31 +0200 Subject: [PATCH 2/4] Disable cycle detection in tasks for now, it is way too unstable --- dtool/src/interrogate/interfaceMakerPythonNative.cxx | 5 +++-- panda/src/event/pythonTask.cxx | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/dtool/src/interrogate/interfaceMakerPythonNative.cxx b/dtool/src/interrogate/interfaceMakerPythonNative.cxx index 2686bb1ce6..a1fec26630 100644 --- a/dtool/src/interrogate/interfaceMakerPythonNative.cxx +++ b/dtool/src/interrogate/interfaceMakerPythonNative.cxx @@ -2730,9 +2730,10 @@ write_module_class(ostream &out, Object *obj) { } string gcflag; - if (obj->_protocol_types & Object::PT_python_gc) { + // Disabled for now because it's too unstable. + /*if (obj->_protocol_types & Object::PT_python_gc) { gcflag = " | Py_TPFLAGS_HAVE_GC"; - } + }*/ // long tp_flags; if (has_local_getbuffer) { diff --git a/panda/src/event/pythonTask.cxx b/panda/src/event/pythonTask.cxx index 3556395c65..88070bdb28 100644 --- a/panda/src/event/pythonTask.cxx +++ b/panda/src/event/pythonTask.cxx @@ -68,9 +68,9 @@ PythonTask(PyObject *function, const string &name) : //////////////////////////////////////////////////////////////////// PythonTask:: ~PythonTask() { - Py_XDECREF(_function); - Py_XDECREF(_args); - Py_XDECREF(__dict__); + Py_DECREF(_function); + Py_DECREF(_args); + Py_DECREF(__dict__); Py_XDECREF(_generator); Py_XDECREF(_owner); Py_XDECREF(_upon_death); @@ -359,12 +359,14 @@ __getattr__(PyObject *attr) const { //////////////////////////////////////////////////////////////////// int PythonTask:: __traverse__(visitproc visit, void *arg) { +/* Py_VISIT(_function); Py_VISIT(_args); Py_VISIT(_upon_death); Py_VISIT(_owner); Py_VISIT(__dict__); Py_VISIT(_generator); +*/ return 0; } @@ -375,12 +377,14 @@ __traverse__(visitproc visit, void *arg) { //////////////////////////////////////////////////////////////////// int PythonTask:: __clear__() { +/* Py_CLEAR(_function); Py_CLEAR(_args); Py_CLEAR(_upon_death); Py_CLEAR(_owner); Py_CLEAR(__dict__); Py_CLEAR(_generator); +*/ return 0; } From a1749b2f07b62eccd3a8c5ac105536499c215fa9 Mon Sep 17 00:00:00 2001 From: rdb Date: Thu, 9 Jun 2016 21:48:57 +0200 Subject: [PATCH 3/4] Don't align transform_blend vertex column to 4 byte boundary Somewhat improves performance of CPU vertex animation --- panda/src/egg2pg/eggLoader.cxx | 4 ++-- panda/src/gobj/geomVertexArrayFormat.cxx | 3 ++- panda/src/grutil/rigidBodyCombiner.cxx | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/panda/src/egg2pg/eggLoader.cxx b/panda/src/egg2pg/eggLoader.cxx index d6b41df09a..06afac8aa0 100644 --- a/panda/src/egg2pg/eggLoader.cxx +++ b/panda/src/egg2pg/eggLoader.cxx @@ -2312,8 +2312,8 @@ make_vertex_data(const EggRenderState *render_state, PT(GeomVertexArrayFormat) anim_array_format = new GeomVertexArrayFormat; anim_array_format->add_column - (InternalName::get_transform_blend(), 1, - Geom::NT_uint16, Geom::C_index); + (InternalName::get_transform_blend(), 1, + Geom::NT_uint16, Geom::C_index, 0, 2); temp_format->add_array(anim_array_format); pmap slider_names; diff --git a/panda/src/gobj/geomVertexArrayFormat.cxx b/panda/src/gobj/geomVertexArrayFormat.cxx index be82d262fd..bc139e2335 100644 --- a/panda/src/gobj/geomVertexArrayFormat.cxx +++ b/panda/src/gobj/geomVertexArrayFormat.cxx @@ -403,7 +403,8 @@ align_columns_for_animation() { add_column(column->get_name(), 4, column->get_numeric_type(), column->get_contents(), -1, 16); } else { add_column(column->get_name(), column->get_num_components(), - column->get_numeric_type(), column->get_contents()); + column->get_numeric_type(), column->get_contents(), + -1, column->get_column_alignment()); } } } diff --git a/panda/src/grutil/rigidBodyCombiner.cxx b/panda/src/grutil/rigidBodyCombiner.cxx index 1ae3a0b1a4..e46f65e424 100644 --- a/panda/src/grutil/rigidBodyCombiner.cxx +++ b/panda/src/grutil/rigidBodyCombiner.cxx @@ -244,9 +244,9 @@ convert_vd(const VertexTransform *transform, const GeomVertexData *orig) { PT(GeomVertexFormat) format = new GeomVertexFormat(*orig->get_format()); if (!orig->get_format()->has_column(InternalName::get_transform_blend())) { - PT(GeomVertexArrayFormat) af = - new GeomVertexArrayFormat(InternalName::get_transform_blend(), 1, - Geom::NT_uint16, Geom::C_index); + PT(GeomVertexArrayFormat) af = new GeomVertexArrayFormat(); + af->add_column(InternalName::get_transform_blend(), 1, + Geom::NT_uint16, Geom::C_index, 0, 2); format->add_array(af); } From 0a731d05454a80b44fef104915463dd426afa83e Mon Sep 17 00:00:00 2001 From: rdb Date: Thu, 9 Jun 2016 21:49:26 +0200 Subject: [PATCH 4/4] Improve performance of texture load/store --- panda/src/gobj/texture.cxx | 152 +++++++++++++++++++++++++++------- panda/src/pnmimage/pnmImage.h | 1 + 2 files changed, 122 insertions(+), 31 deletions(-) diff --git a/panda/src/gobj/texture.cxx b/panda/src/gobj/texture.cxx index e61f272d86..fa16fa7ab8 100644 --- a/panda/src/gobj/texture.cxx +++ b/panda/src/gobj/texture.cxx @@ -6003,30 +6003,89 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, if (maxval == 255 && component_width == 1) { // Most common case: one byte per pixel, and the source image - // shows a maxval of 255. No scaling is necessary. - for (int j = y_size-1; j >= 0; j--) { - for (int i = 0; i < x_size; i++) { - if (is_grayscale) { - store_unscaled_byte(p, pnmimage.get_gray_val(i, j)); - } else { - store_unscaled_byte(p, pnmimage.get_blue_val(i, j)); - store_unscaled_byte(p, pnmimage.get_green_val(i, j)); - store_unscaled_byte(p, pnmimage.get_red_val(i, j)); + // maxval of 255. No scaling is necessary. Because this is such a common + // case, we break it out per component for best performance. + switch (num_components) { + case 1: + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); } - if (has_alpha) { - if (img_has_alpha) { - store_unscaled_byte(p, pnmimage.get_alpha_val(i, j)); - } else { - store_unscaled_byte(p, 255); + p += row_skip; + } + break; + + case 2: + if (img_has_alpha) { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + xelval *alpha_row = pnmimage.alpha_row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); + *p++ = (uchar)alpha_row[i]; } + p += row_skip; + } + } else { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); + *p++ = (uchar)255; + } + p += row_skip; } } - p += row_skip; + break; + + case 3: + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); + *p++ = (uchar)PPM_GETG(row[i]); + *p++ = (uchar)PPM_GETR(row[i]); + } + p += row_skip; + } + break; + + case 4: + if (img_has_alpha) { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + xelval *alpha_row = pnmimage.alpha_row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); + *p++ = (uchar)PPM_GETG(row[i]); + *p++ = (uchar)PPM_GETR(row[i]); + *p++ = (uchar)alpha_row[i]; + } + p += row_skip; + } + } else { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + *p++ = (uchar)PPM_GETB(row[i]); + *p++ = (uchar)PPM_GETG(row[i]); + *p++ = (uchar)PPM_GETR(row[i]); + *p++ = (uchar)255; + } + p += row_skip; + } + } + break; + + default: + nassertv(num_components >= 1 && num_components <= 4); + break; } } else if (maxval == 65535 && component_width == 2) { - // Another possible case: two bytes per pixel, and the source - // image shows a maxval of 65535. Again, no scaling is necessary. + // Another possible case: two bytes per pixel, and the source image shows + // a maxval of 65535. Again, no scaling is necessary. for (int j = y_size-1; j >= 0; j--) { for (int i = 0; i < x_size; i++) { if (is_grayscale) { @@ -6201,17 +6260,44 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size, const unsigned char *p = &image[idx]; if (component_width == 1) { - for (int j = y_size-1; j >= 0; j--) { - for (int i = 0; i < x_size; i++) { - if (is_grayscale) { - pnmimage.set_gray(i, j, get_unsigned_byte(p)); - } else { - pnmimage.set_blue(i, j, get_unsigned_byte(p)); - pnmimage.set_green(i, j, get_unsigned_byte(p)); - pnmimage.set_red(i, j, get_unsigned_byte(p)); + if (is_grayscale) { + if (has_alpha) { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + xelval *alpha_row = pnmimage.alpha_row(j); + for (int i = 0; i < x_size; i++) { + PPM_PUTB(row[i], *p++); + alpha_row[i] = *p++; + } } - if (has_alpha) { - pnmimage.set_alpha(i, j, get_unsigned_byte(p)); + } else { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + PPM_PUTB(row[i], *p++); + } + } + } + } else { + if (has_alpha) { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + xelval *alpha_row = pnmimage.alpha_row(j); + for (int i = 0; i < x_size; i++) { + PPM_PUTB(row[i], *p++); + PPM_PUTG(row[i], *p++); + PPM_PUTR(row[i], *p++); + alpha_row[i] = *p++; + } + } + } else { + for (int j = y_size-1; j >= 0; j--) { + xel *row = pnmimage.row(j); + for (int i = 0; i < x_size; i++) { + PPM_PUTB(row[i], *p++); + PPM_PUTG(row[i], *p++); + PPM_PUTR(row[i], *p++); + } } } } @@ -6944,11 +7030,15 @@ compare_images(const PNMImage &a, const PNMImage &b) { int delta = 0; for (int yi = 0; yi < a.get_y_size(); ++yi) { + xel *a_row = a.row(yi); + xel *b_row = b.row(yi); + xelval *a_alpha_row = a.alpha_row(yi); + xelval *b_alpha_row = b.alpha_row(yi); for (int xi = 0; xi < a.get_x_size(); ++xi) { - delta += abs(a.get_red_val(xi, yi) - b.get_red_val(xi, yi)); - delta += abs(a.get_green_val(xi, yi) - b.get_green_val(xi, yi)); - delta += abs(a.get_blue_val(xi, yi) - b.get_blue_val(xi, yi)); - delta += abs(a.get_alpha_val(xi, yi) - b.get_alpha_val(xi, yi)); + delta += abs(PPM_GETR(a_row[xi]) - PPM_GETR(b_row[xi])); + delta += abs(PPM_GETG(a_row[xi]) - PPM_GETG(b_row[xi])); + delta += abs(PPM_GETB(a_row[xi]) - PPM_GETB(b_row[xi])); + delta += abs(a_alpha_row[xi] - b_alpha_row[xi]); } } diff --git a/panda/src/pnmimage/pnmImage.h b/panda/src/pnmimage/pnmImage.h index c970411d27..802d539964 100644 --- a/panda/src/pnmimage/pnmImage.h +++ b/panda/src/pnmimage/pnmImage.h @@ -372,6 +372,7 @@ PUBLISHED: private: friend class Row; + friend class Texture; xel *_array; xelval *_alpha;