From 62d0d8292e21561caf6486dfdc3ec87204443f14 Mon Sep 17 00:00:00 2001 From: rdb Date: Fri, 9 Dec 2016 01:41:32 +0100 Subject: [PATCH] More texture load/store performance optimisations --- panda/src/gobj/texture.cxx | 53 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/panda/src/gobj/texture.cxx b/panda/src/gobj/texture.cxx index bcabaa7a8c..cd5cd54e73 100644 --- a/panda/src/gobj/texture.cxx +++ b/panda/src/gobj/texture.cxx @@ -6005,10 +6005,11 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, // Most common case: one byte per pixel, and the source image // maxval of 255. No scaling is necessary. Because this is such a common // case, we break it out per component for best performance. + const xel *array = pnmimage.get_array(); switch (num_components) { case 1: for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + const xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); } @@ -6018,9 +6019,10 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, case 2: if (img_has_alpha) { + const xelval *alpha = pnmimage.get_alpha_array(); for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); - xelval *alpha_row = pnmimage.alpha_row(j); + const xel *row = array + j * x_size; + const xelval *alpha_row = alpha + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); *p++ = (uchar)alpha_row[i]; @@ -6029,7 +6031,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, } } else { for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + const xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); *p++ = (uchar)255; @@ -6041,7 +6043,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, case 3: for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + const xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); *p++ = (uchar)PPM_GETG(row[i]); @@ -6053,9 +6055,10 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, case 4: if (img_has_alpha) { + const xelval *alpha = pnmimage.get_alpha_array(); for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); - xelval *alpha_row = pnmimage.alpha_row(j); + const xel *row = array + j * x_size; + const xelval *alpha_row = alpha + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); *p++ = (uchar)PPM_GETG(row[i]); @@ -6066,7 +6069,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, } } else { for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + const xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { *p++ = (uchar)PPM_GETB(row[i]); *p++ = (uchar)PPM_GETG(row[i]); @@ -6089,7 +6092,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size, for (int j = y_size-1; j >= 0; j--) { for (int i = 0; i < x_size; i++) { if (is_grayscale) { - store_unscaled_short(p, pnmimage.get_gray_val(i, j)); + store_unscaled_short(p, pnmimage.get_gray_val(i, j)); } else { store_unscaled_short(p, pnmimage.get_blue_val(i, j)); store_unscaled_short(p, pnmimage.get_green_val(i, j)); @@ -6260,11 +6263,13 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size, const unsigned char *p = &image[idx]; if (component_width == 1) { + xel *array = pnmimage.get_array(); if (is_grayscale) { if (has_alpha) { + xelval *alpha = pnmimage.get_alpha_array(); for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); - xelval *alpha_row = pnmimage.alpha_row(j); + xel *row = array + j * x_size; + xelval *alpha_row = alpha + j * x_size; for (int i = 0; i < x_size; i++) { PPM_PUTB(row[i], *p++); alpha_row[i] = *p++; @@ -6272,7 +6277,7 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size, } } else { for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { PPM_PUTB(row[i], *p++); } @@ -6280,9 +6285,10 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size, } } else { if (has_alpha) { + xelval *alpha = pnmimage.get_alpha_array(); for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); - xelval *alpha_row = pnmimage.alpha_row(j); + xel *row = array + j * x_size; + xelval *alpha_row = alpha + j * x_size; for (int i = 0; i < x_size; i++) { PPM_PUTB(row[i], *p++); PPM_PUTG(row[i], *p++); @@ -6292,7 +6298,7 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size, } } else { for (int j = y_size-1; j >= 0; j--) { - xel *row = pnmimage.row(j); + xel *row = array + j * x_size; for (int i = 0; i < x_size; i++) { PPM_PUTB(row[i], *p++); PPM_PUTG(row[i], *p++); @@ -7028,13 +7034,20 @@ compare_images(const PNMImage &a, const PNMImage &b) { nassertr(a.get_x_size() == b.get_x_size() && a.get_y_size() == b.get_y_size(), false); + const xel *a_array = a.get_array(); + const xel *b_array = b.get_array(); + const xelval *a_alpha = a.get_alpha_array(); + const xelval *b_alpha = b.get_alpha_array(); + + int x_size = a.get_x_size(); + int delta = 0; for (int yi = 0; yi < a.get_y_size(); ++yi) { - xel *a_row = a.row(yi); - xel *b_row = b.row(yi); - xelval *a_alpha_row = a.alpha_row(yi); - xelval *b_alpha_row = b.alpha_row(yi); - for (int xi = 0; xi < a.get_x_size(); ++xi) { + const xel *a_row = a_array + yi * x_size; + const xel *b_row = b_array + yi * x_size; + const xelval *a_alpha_row = a_alpha + yi * x_size; + const xelval *b_alpha_row = b_alpha + yi * x_size; + for (int xi = 0; xi < x_size; ++xi) { delta += abs(PPM_GETR(a_row[xi]) - PPM_GETR(b_row[xi])); delta += abs(PPM_GETG(a_row[xi]) - PPM_GETG(b_row[xi])); delta += abs(PPM_GETB(a_row[xi]) - PPM_GETB(b_row[xi]));