From 04d4309b14515ee7f0bbd23360a61c1ee02a795a Mon Sep 17 00:00:00 2001 From: rdb Date: Sun, 1 Mar 2015 11:46:52 +0100 Subject: [PATCH] SSE2-enhanced sRGB encode (with runtime CPU detection) --- .../interfaceMakerPythonNative.cxx | 9 +- makepanda/makepanda.py | 12 +- panda/src/gobj/texture.cxx | 141 +++++++------- panda/src/gobj/texture.h | 7 + panda/src/pnmimage/convert_srgb.I | 175 ++++++++++++++++++ panda/src/pnmimage/convert_srgb.cxx | 165 +++++++++++++++++ panda/src/pnmimage/convert_srgb.h | 59 ++++++ panda/src/pnmimage/convert_srgb_sse2.cxx | 151 +++++++++++++++ panda/src/pnmimage/p3pnmimage_composite1.cxx | 1 + panda/src/tinydisplay/zbuffer.h | 2 +- 10 files changed, 645 insertions(+), 77 deletions(-) create mode 100644 panda/src/pnmimage/convert_srgb.I create mode 100644 panda/src/pnmimage/convert_srgb.cxx create mode 100644 panda/src/pnmimage/convert_srgb.h create mode 100644 panda/src/pnmimage/convert_srgb_sse2.cxx diff --git a/dtool/src/interrogate/interfaceMakerPythonNative.cxx b/dtool/src/interrogate/interfaceMakerPythonNative.cxx index 5bb52227ca..db63d2bc9d 100644 --- a/dtool/src/interrogate/interfaceMakerPythonNative.cxx +++ b/dtool/src/interrogate/interfaceMakerPythonNative.cxx @@ -3204,7 +3204,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj, } extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");"; extra_param_check += " && " + param_name + "_long != NULL"; - pexpr_string = "PyLong_AsUnsignedLongLong(" + param_name + "_long)"; + pexpr_string = "(" + type->get_local_name(&parser) + ")" + + "PyLong_AsUnsignedLongLong(" + param_name + "_long)"; extra_cleanup += "Py_XDECREF(" + param_name + "_long);"; expected_params += "unsigned long long"; ++num_params; @@ -3219,7 +3220,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj, } extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");"; extra_param_check += " && " + param_name + "_long != NULL"; - pexpr_string = "PyLong_AsLongLong(" + param_name + "_long)"; + pexpr_string = "(" + type->get_local_name(&parser) + ")" + + "PyLong_AsLongLong(" + param_name + "_long)"; extra_cleanup += "Py_XDECREF(" + param_name + "_long);"; expected_params += "long long"; ++num_params; @@ -3234,7 +3236,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj, } extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");"; extra_param_check += " && " + param_name + "_long != NULL"; - pexpr_string = "PyLong_AsUnsignedLong(" + param_name + "_long)"; + pexpr_string = "(" + type->get_local_name(&parser) + ")" + + "PyLong_AsUnsignedLong(" + param_name + "_long)"; extra_cleanup += "Py_XDECREF(" + param_name + "_long);"; expected_params += "unsigned int"; ++num_params; diff --git a/makepanda/makepanda.py b/makepanda/makepanda.py index 72d30a5dab..0be433b6d7 100755 --- a/makepanda/makepanda.py +++ b/makepanda/makepanda.py @@ -949,7 +949,7 @@ def CompileCxx(obj,src,opts): if PkgSkip("TOUCHINPUT") == 0: cmd += "/DWINVER=0x601 " cmd += "/Fo" + obj + " /nologo /c" - if (GetTargetArch() != 'x64' and PkgSkip("SSE2") == 0): + if GetTargetArch() != 'x64' and (not PkgSkip("SSE2") or 'SSE2' in opts): cmd += " /arch:SSE2" for x in ipath: cmd += " /I" + x for (opt,dir) in INCDIRECTORIES: @@ -1160,7 +1160,7 @@ def CompileCxx(obj,src,opts): if optlevel >= 4 or GetTarget() == "android": cmd += " -fno-rtti" - if PkgSkip("SSE2") == 0 and not arch.startswith("arm"): + if ('SSE2' in opts or not PkgSkip("SSE2")) and not arch.startswith("arm"): cmd += " -msse2" if optlevel >= 3: @@ -1705,7 +1705,7 @@ def RunGenPyCode(target, inputs, opts): if (PkgSkip("PYTHON") != 0): return - cmdstr = sys.executable + " " + cmdstr = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " " if sys.version_info >= (2, 6): cmdstr += "-B " @@ -1729,7 +1729,7 @@ def RunGenPyCode(target, inputs, opts): def FreezePy(target, inputs, opts): assert len(inputs) > 0 # Make sure this function isn't called before genpycode is run. - cmdstr = sys.executable + " " + cmdstr = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " " if sys.version_info >= (2, 6): cmdstr += "-B " @@ -1757,7 +1757,7 @@ def FreezePy(target, inputs, opts): def Package(target, inputs, opts): assert len(inputs) == 1 # Invoke the ppackage script. - command = sys.executable + " " + command = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " " if GetOptimizeOption(opts) >= 4: command += "-OO " @@ -3192,6 +3192,7 @@ if (not RUNTIME): OPTS=['DIR:panda/src/pnmimage', 'BUILDING:PANDA', 'ZLIB'] TargetAdd('p3pnmimage_composite1.obj', opts=OPTS, input='p3pnmimage_composite1.cxx') TargetAdd('p3pnmimage_composite2.obj', opts=OPTS, input='p3pnmimage_composite2.cxx') + TargetAdd('p3pnmimage_convert_srgb_sse2.obj', opts=OPTS+['SSE2'], input='convert_srgb_sse2.cxx') OPTS=['DIR:panda/src/pnmimage', 'ZLIB'] IGATEFILES=GetDirectoryContents('panda/src/pnmimage', ["*.h", "*_composite*.cxx"]) @@ -3621,6 +3622,7 @@ if (not RUNTIME): TargetAdd('libpanda.dll', input='p3pnmimagetypes_composite2.obj') TargetAdd('libpanda.dll', input='p3pnmimage_composite1.obj') TargetAdd('libpanda.dll', input='p3pnmimage_composite2.obj') + TargetAdd('libpanda.dll', input='p3pnmimage_convert_srgb_sse2.obj') TargetAdd('libpanda.dll', input='p3text_composite1.obj') TargetAdd('libpanda.dll', input='p3text_composite2.obj') TargetAdd('libpanda.dll', input='p3tform_composite1.obj') diff --git a/panda/src/gobj/texture.cxx b/panda/src/gobj/texture.cxx index d44903e921..08474cc445 100644 --- a/panda/src/gobj/texture.cxx +++ b/panda/src/gobj/texture.cxx @@ -41,6 +41,7 @@ #include "pbitops.h" #include "streamReader.h" #include "texturePeeker.h" +#include "convert_srgb.h" #ifdef HAVE_SQUISH #include @@ -131,46 +132,6 @@ struct DDSHeader { DDSCaps2 caps; }; -// This table is used for converting unsigned char texture values in an sRGB -// texture to linear RGB values, for use in mipmap generation. -static float srgb_to_lrgbf[256] = {0.000000f, 0.000304f, 0.000607f, 0.000911f, - 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f, - 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, - 0.006049f, 0.006512f, 0.006995f, 0.007499f, 0.008023f, 0.008568f, 0.009134f, - 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, - 0.014444f, 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, - 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, 0.025187f, 0.026241f, - 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, - 0.035601f, 0.036889f, 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, - 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, 0.054480f, - 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, - 0.068478f, 0.070360f, 0.072272f, 0.074214f, 0.076185f, 0.078187f, 0.080220f, - 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f, - 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, - 0.114435f, 0.116971f, 0.119538f, 0.122139f, 0.124772f, 0.127438f, 0.130136f, - 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, - 0.152926f, 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, - 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, 0.191202f, 0.194618f, - 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, - 0.223228f, 0.226966f, 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, - 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, 0.274677f, - 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, - 0.309469f, 0.313989f, 0.318547f, 0.323143f, 0.327778f, 0.332452f, 0.337164f, - 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f, - 0.376262f, 0.381326f, 0.386429f, 0.391572f, 0.396755f, 0.401978f, 0.407240f, - 0.412543f, 0.417885f, 0.423268f, 0.428690f, 0.434154f, 0.439657f, 0.445201f, - 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473531f, 0.479320f, 0.485150f, - 0.491021f, 0.496933f, 0.502886f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, - 0.533276f, 0.539479f, 0.545724f, 0.552011f, 0.558340f, 0.564712f, 0.571125f, - 0.577580f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, - 0.623960f, 0.630757f, 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, - 0.672443f, 0.679542f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, 0.715694f, - 0.723055f, 0.730461f, 0.737910f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, - 0.775822f, 0.783538f, 0.791298f, 0.799103f, 0.806952f, 0.814847f, 0.822786f, - 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f, - 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, - 0.947307f, 0.955973f, 0.964686f, 0.973445f, 0.982251f, 0.991102f, 1.000000f}; - //////////////////////////////////////////////////////////////////// // Function: Texture::Constructor // Access: Published @@ -6990,7 +6951,13 @@ do_filter_2d_mipmap_pages(const CData *cdata, // We currently only support sRGB mipmap generation for // unsigned byte textures, due to our use of a lookup table. nassertv(cdata->_component_type == T_unsigned_byte); - filter_component = &filter_2d_unsigned_byte_srgb; + + if (has_sse2_sRGB_encode()) { + filter_component = &filter_2d_unsigned_byte_srgb_sse2; + } else { + filter_component = &filter_2d_unsigned_byte_srgb; + } + // Alpha is always linear. filter_alpha = &filter_2d_unsigned_byte; @@ -7140,7 +7107,13 @@ do_filter_3d_mipmap_level(const CData *cdata, // We currently only support sRGB mipmap generation for // unsigned byte textures, due to our use of a lookup table. nassertv(cdata->_component_type == T_unsigned_byte); - filter_component = &filter_3d_unsigned_byte_srgb; + + if (has_sse2_sRGB_encode()) { + filter_component = &filter_3d_unsigned_byte_srgb_sse2; + } else { + filter_component = &filter_3d_unsigned_byte_srgb; + } + // Alpha is always linear. filter_alpha = &filter_3d_unsigned_byte; @@ -7385,18 +7358,32 @@ filter_2d_unsigned_byte(unsigned char *&p, const unsigned char *&q, void Texture:: filter_2d_unsigned_byte_srgb(unsigned char *&p, const unsigned char *&q, size_t pixel_size, size_t row_size) { - float result = (srgb_to_lrgbf[q[0]] + - srgb_to_lrgbf[q[pixel_size]] + - srgb_to_lrgbf[q[row_size]] + - srgb_to_lrgbf[q[pixel_size + row_size]]) / 4.0f; + float result = (decode_sRGB_float(q[0]) + + decode_sRGB_float(q[pixel_size]) + + decode_sRGB_float(q[row_size]) + + decode_sRGB_float(q[pixel_size + row_size])); - // This is based on the formula out of the EXT_texture_sRGB - // specification, except the factors are multiplied with 255.0f. - if (result < 0.0031308f) { - *p = (unsigned char)(result * 3294.6f); - } else { - *p = (unsigned char)(269.025f * powf(result, 0.41666f) - 14.025f); - } + *p = encode_sRGB_uchar(result * 0.25f); + ++p; + ++q; +} + +//////////////////////////////////////////////////////////////////// +// Function: Texture::filter_2d_unsigned_byte_srgb_sse2 +// Access: Public, Static +// Description: Averages a 2x2 block of pixel components into a +// single pixel component, for producing the next mipmap +// level. Increments p and q to the next component. +//////////////////////////////////////////////////////////////////// +void Texture:: +filter_2d_unsigned_byte_srgb_sse2(unsigned char *&p, const unsigned char *&q, + size_t pixel_size, size_t row_size) { + float result = (decode_sRGB_float(q[0]) + + decode_sRGB_float(q[pixel_size]) + + decode_sRGB_float(q[row_size]) + + decode_sRGB_float(q[pixel_size + row_size])); + + *p = encode_sRGB_uchar_sse2(result * 0.25f); ++p; ++q; } @@ -7470,22 +7457,40 @@ filter_3d_unsigned_byte(unsigned char *&p, const unsigned char *&q, void Texture:: filter_3d_unsigned_byte_srgb(unsigned char *&p, const unsigned char *&q, size_t pixel_size, size_t row_size, size_t page_size) { - float result = (srgb_to_lrgbf[q[0]] + - srgb_to_lrgbf[q[pixel_size]] + - srgb_to_lrgbf[q[row_size]] + - srgb_to_lrgbf[q[pixel_size + row_size]] + - srgb_to_lrgbf[q[page_size]] + - srgb_to_lrgbf[q[pixel_size + page_size]] + - srgb_to_lrgbf[q[row_size + page_size]] + - srgb_to_lrgbf[q[pixel_size + row_size + page_size]]) / 8.0f; + float result = (decode_sRGB_float(q[0]) + + decode_sRGB_float(q[pixel_size]) + + decode_sRGB_float(q[row_size]) + + decode_sRGB_float(q[pixel_size + row_size]) + + decode_sRGB_float(q[page_size]) + + decode_sRGB_float(q[pixel_size + page_size]) + + decode_sRGB_float(q[row_size + page_size]) + + decode_sRGB_float(q[pixel_size + row_size + page_size])); - // This is based on the formula out of the EXT_texture_sRGB - // specification, except the factors are multiplied with 255.0f. - if (result < 0.0031308f) { - *p = (unsigned char)(result * 3294.6f); - } else { - *p = (unsigned char)(269.025f * powf(result, 0.41666f) - 14.025f); - } + *p = encode_sRGB_uchar(result * 0.125f); + ++p; + ++q; +} + +//////////////////////////////////////////////////////////////////// +// Function: Texture::filter_3d_unsigned_byte_srgb_sse2 +// Access: Public, Static +// Description: Averages a 2x2x2 block of pixel components into a +// single pixel component, for producing the next mipmap +// level. Increments p and q to the next component. +//////////////////////////////////////////////////////////////////// +void Texture:: +filter_3d_unsigned_byte_srgb_sse2(unsigned char *&p, const unsigned char *&q, + size_t pixel_size, size_t row_size, size_t page_size) { + float result = (decode_sRGB_float(q[0]) + + decode_sRGB_float(q[pixel_size]) + + decode_sRGB_float(q[row_size]) + + decode_sRGB_float(q[pixel_size + row_size]) + + decode_sRGB_float(q[page_size]) + + decode_sRGB_float(q[pixel_size + page_size]) + + decode_sRGB_float(q[row_size + page_size]) + + decode_sRGB_float(q[pixel_size + row_size + page_size])); + + *p = encode_sRGB_uchar_sse2(result * 0.125f); ++p; ++q; } diff --git a/panda/src/gobj/texture.h b/panda/src/gobj/texture.h index c8e0571aea..66e52ed7da 100644 --- a/panda/src/gobj/texture.h +++ b/panda/src/gobj/texture.h @@ -749,6 +749,9 @@ private: static void filter_2d_unsigned_byte_srgb(unsigned char *&p, const unsigned char *&q, size_t pixel_size, size_t row_size); + static void filter_2d_unsigned_byte_srgb_sse2(unsigned char *&p, + const unsigned char *&q, + size_t pixel_size, size_t row_size); static void filter_2d_unsigned_short(unsigned char *&p, const unsigned char *&q, size_t pixel_size, size_t row_size); @@ -763,6 +766,10 @@ private: const unsigned char *&q, size_t pixel_size, size_t row_size, size_t page_size); + static void filter_3d_unsigned_byte_srgb_sse2(unsigned char *&p, + const unsigned char *&q, + size_t pixel_size, size_t row_size, + size_t page_size); static void filter_3d_unsigned_short(unsigned char *&p, const unsigned char *&q, size_t pixel_size, size_t row_size, diff --git a/panda/src/pnmimage/convert_srgb.I b/panda/src/pnmimage/convert_srgb.I new file mode 100644 index 0000000000..e841748fb4 --- /dev/null +++ b/panda/src/pnmimage/convert_srgb.I @@ -0,0 +1,175 @@ +// Filename: convert_srgb.I +// Created by: rdb (29Oct14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////// +// Function: decode_sRGB_float +// Description: Decodes the sRGB-encoded unsigned char value to +// a linearized float in the range 0-1. +//////////////////////////////////////////////////////////////////// +CONSTEXPR float decode_sRGB_float(unsigned char val) { + return to_linear_float_table[val]; +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_float +// Description: Decodes the sRGB-encoded floating-point value in +// the range 0-1 to a linearized float in the range +// 0-1. Inputs outside this range produce invalid +// results. +//////////////////////////////////////////////////////////////////// +INLINE float decode_sRGB_float(float val) { + return (val <= 0.04045f) + ? (val * (1.f / 12.92f)) + : cpow((val + 0.055f) * (1.f / 1.055f), 2.4f); +} + +//////////////////////////////////////////////////////////////////// +// Function: decode_sRGB_uchar +// Description: Decodes the sRGB-encoded unsigned char value to +// a linearized unsigned char value. +//////////////////////////////////////////////////////////////////// +CONSTEXPR unsigned char decode_sRGB_uchar(unsigned char val) { + return to_linear_uchar_table[val]; +} + +//////////////////////////////////////////////////////////////////// +// Function: decode_sRGB_uchar +// Description: Decodes the sRGB-encoded floating-point value in +// the range 0-1 to a linearized unsigned char value. +// Inputs outside this range are clamped. +//////////////////////////////////////////////////////////////////// +INLINE unsigned char decode_sRGB_uchar(float val) { + return (val <= 0.04045f) + ? (unsigned char)(max(0.f, val) * (255.f / 12.92f) + 0.5f) + : (unsigned char)(cpow((min(val, 1.f) + 0.055f) * (1.f / 1.055f), 2.4f) * 255.f + 0.5f); +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_float +// Description: Encodes the linearized unsigned char value to an +// sRGB-encoded floating-point value in ther range 0-1. +//////////////////////////////////////////////////////////////////// +INLINE float +encode_sRGB_float(unsigned char val) { + // This seems like a very unlikely use case, so I didn't bother + // making a look-up table for this. + return (val == 0) ? 0 + : (1.055f * cpow((float)val * (1.f / 255.f), 0.41666f) - 0.055); +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_float +// Description: Encodes the linearized floating-point value in the +// range 0-1 to an sRGB-encoded float in the range +// 0-1. Inputs outside this range produce invalid +// results. +//////////////////////////////////////////////////////////////////// +INLINE float +encode_sRGB_float(float val) { + return (val < 0.0031308f) + ? (val * 12.92f) + : (1.055f * cpow(val, 0.41666f) - 0.055); +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_uchar +// Description: Encodes the linearized unsigned char value to an +// sRGB-encoded unsigned char value. +//////////////////////////////////////////////////////////////////// +CONSTEXPR unsigned char +encode_sRGB_uchar(unsigned char val) { + return to_srgb8_table[val]; +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_uchar +// Description: Encodes the linearized floating-point value in the +// range 0-1 to an sRGB-encoded unsigned char value. +// Inputs outside this range are clamped. +// +// When SSE2 support is known at compile time, this +// automatically uses an optimized version. Otherwise, +// it does not attempt runtime CPU detection. If you +// know that SSE2 is supported (ie. if the function +// has_sse2_sRGB_encode() returns true) you should +// call encode_sRGB_uchar_sse2 instead. +//////////////////////////////////////////////////////////////////// +INLINE unsigned char +encode_sRGB_uchar(float val) { +#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) + // Use a highly optimized approximation that has more than enough + // accuracy for an unsigned char. + return encode_sRGB_uchar_sse2(val); +#else + return (val < 0.0031308f) + ? (unsigned char) (max(0.f, val) * 3294.6f + 0.5f) + : (unsigned char) (269.025f * cpow(min(val, 1.f), 0.41666f) - 13.525f); +#endif +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_uchar +// Description: Encodes the linearized floating-point color value +// an sRGB-encoded xel in the range 0-255. +// +// When SSE2 support is known at compile time, this +// automatically uses an optimized version. Otherwise, +// it does not attempt runtime CPU detection. If you +// know that SSE2 is supported (ie. if the function +// has_sse2_sRGB_encode() returns true) you should +// call encode_sRGB_uchar_sse2 instead. +//////////////////////////////////////////////////////////////////// +INLINE void +encode_sRGB_uchar(const LColorf &color, xel &into) { +#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) + // SSE2 support compiled-in; we're guaranteed to have it. + encode_sRGB_uchar_sse2(color, into); +#else + // Boring, slow, non-SSE2 version. + PPM_ASSIGN(into, + encode_sRGB_uchar(color[0]), + encode_sRGB_uchar(color[1]), + encode_sRGB_uchar(color[2])); +#endif +} + +//////////////////////////////////////////////////////////////////// +// Function: encode_sRGB_uchar +// Description: Encodes the linearized floating-point color value +// an sRGB-encoded xel and alpha in the range 0-255. +// The alpha value is not sRGB-encoded. +// +// When SSE2 support is known at compile time, this +// automatically uses an optimized version. Otherwise, +// it does not attempt runtime CPU detection. If you +// know that SSE2 is supported (ie. if the function +// has_sse2_sRGB_encode() returns true) you should +// call encode_sRGB_uchar_sse2 instead. +//////////////////////////////////////////////////////////////////// +INLINE void +encode_sRGB_uchar(const LColorf &color, xel &into, xelval &into_alpha) { +#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) + // SSE2 support compiled-in; we're guaranteed to have it. + encode_sRGB_uchar_sse2(color, into, into_alpha); +#else + // Boring, slow, non-SSE2 version. + PPM_ASSIGN(into, + encode_sRGB_uchar(color[0]), + encode_sRGB_uchar(color[1]), + encode_sRGB_uchar(color[2])); + + into_alpha = (xelval) (color[3] * 255.f + 0.5f); +#endif +} diff --git a/panda/src/pnmimage/convert_srgb.cxx b/panda/src/pnmimage/convert_srgb.cxx new file mode 100644 index 0000000000..ef27735d06 --- /dev/null +++ b/panda/src/pnmimage/convert_srgb.cxx @@ -0,0 +1,165 @@ +// Filename: convert_srgb.cxx +// Created by: rdb (13Nov14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + +#include "convert_srgb.h" + +#ifdef __GNUC__ +#include +#endif + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include +#endif + +// Lookup tables for converting from unsigned char formats. +ALIGN_64BYTE const +unsigned char to_srgb8_table[256] = { 0x00, 0x0d, 0x16, 0x1c, 0x22, 0x26, 0x2a, + 0x2e, 0x32, 0x35, 0x38, 0x3b, 0x3d, 0x40, 0x42, 0x45, 0x47, 0x49, 0x4b, 0x4d, + 0x4f, 0x51, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5d, 0x5f, 0x60, 0x62, 0x63, + 0x65, 0x66, 0x68, 0x69, 0x6a, 0x6c, 0x6d, 0x6e, 0x70, 0x71, 0x72, 0x73, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, + 0x91, 0x92, 0x93, 0x94, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, + 0x9c, 0x9d, 0x9e, 0x9f, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa3, 0xa4, 0xa5, 0xa6, + 0xa7, 0xa7, 0xa8, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xad, 0xad, 0xae, 0xaf, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb2, 0xb3, 0xb4, 0xb4, 0xb5, 0xb6, 0xb6, 0xb7, 0xb8, 0xb9, + 0xb9, 0xba, 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf, 0xc0, 0xc0, 0xc1, + 0xc2, 0xc2, 0xc3, 0xc4, 0xc4, 0xc5, 0xc5, 0xc6, 0xc7, 0xc7, 0xc8, 0xc8, 0xc9, + 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xcd, 0xce, 0xce, 0xcf, 0xd0, 0xd0, 0xd1, + 0xd1, 0xd2, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5, 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, + 0xd8, 0xd9, 0xda, 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde, 0xde, 0xdf, + 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe2, 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, + 0xe6, 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xe9, 0xea, 0xea, 0xeb, 0xeb, 0xec, 0xec, + 0xed, 0xed, 0xee, 0xee, 0xee, 0xef, 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, + 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, + 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, + 0xff, 0xff}; + +ALIGN_64BYTE const +unsigned char to_linear_uchar_table[256] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, + 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, 0x10, + 0x11, 0x11, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, 0x14, 0x15, 0x16, 0x16, 0x17, + 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1e, + 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4c, 0x4d, + 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x5a, 0x5b, 0x5c, + 0x5d, 0x5f, 0x60, 0x61, 0x63, 0x64, 0x65, 0x67, 0x68, 0x69, 0x6b, 0x6c, 0x6d, + 0x6f, 0x70, 0x72, 0x73, 0x74, 0x76, 0x77, 0x79, 0x7a, 0x7c, 0x7d, 0x7f, 0x80, + 0x82, 0x83, 0x85, 0x86, 0x88, 0x8a, 0x8b, 0x8d, 0x8e, 0x90, 0x92, 0x93, 0x95, + 0x97, 0x98, 0x9a, 0x9c, 0x9d, 0x9f, 0xa1, 0xa3, 0xa4, 0xa6, 0xa8, 0xaa, 0xab, + 0xad, 0xaf, 0xb1, 0xb3, 0xb5, 0xb7, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, + 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe5, 0xe7, 0xe9, 0xeb, 0xed, 0xef, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, + 0xfd, 0xff}; + +ALIGN_64BYTE +const float to_linear_float_table[256] = { 0, 0.000304f, 0.000607f, 0.000911f, + 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f, + 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, + 0.006049f, 0.006512f, 0.006995f, 0.007499f, 0.008023f, 0.008568f, 0.009134f, + 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, + 0.014444f, 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, + 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, 0.025187f, 0.026241f, + 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, + 0.035601f, 0.036889f, 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, + 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, 0.054480f, + 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, + 0.068478f, 0.070360f, 0.072272f, 0.074214f, 0.076185f, 0.078187f, 0.080220f, + 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f, + 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, + 0.114435f, 0.116971f, 0.119538f, 0.122139f, 0.124772f, 0.127438f, 0.130136f, + 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, + 0.152926f, 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, + 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, 0.191202f, 0.194618f, + 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, + 0.223228f, 0.226966f, 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, + 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, 0.274677f, + 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, + 0.309469f, 0.313989f, 0.318547f, 0.323143f, 0.327778f, 0.332452f, 0.337164f, + 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f, + 0.376262f, 0.381326f, 0.386429f, 0.391572f, 0.396755f, 0.401978f, 0.407240f, + 0.412543f, 0.417885f, 0.423268f, 0.428690f, 0.434154f, 0.439657f, 0.445201f, + 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473531f, 0.479320f, 0.485150f, + 0.491021f, 0.496933f, 0.502886f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, + 0.533276f, 0.539479f, 0.545724f, 0.552011f, 0.558340f, 0.564712f, 0.571125f, + 0.577580f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, + 0.623960f, 0.630757f, 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, + 0.672443f, 0.679542f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, 0.715694f, + 0.723055f, 0.730461f, 0.737910f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, + 0.775822f, 0.783538f, 0.791298f, 0.799103f, 0.806952f, 0.814847f, 0.822786f, + 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f, + 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, + 0.947307f, 0.955973f, 0.964686f, 0.973445f, 0.982251f, 0.991102f, 1.000000f}; + + +#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) +// SSE2 support enabled at compile time. No runtime detection mechanism needed. +bool +has_sse2_sRGB_encode() { + return true; +} + +#else +// SSE2 support not guaranteed. Use a runtime detection mechanism. + +bool +has_sse2_sRGB_encode() { +#if defined(__GNUC__) + unsigned int a, b, c, d; + static const bool has_support = + (__get_cpuid(1, &a, &b, &c, &d) == 1 && (d & 0x04000000) != 0); + +#elif defined(_WIN32) + static const bool has_support = + (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) != FALSE); + +#else + static const bool has_support = false; +#endif + + if (pnmimage_cat.is_debug()) { + static bool checked = false; + if (!checked) { +#if defined(__GNUC__) || defined(_WIN32) + if (has_support) { + pnmimage_cat.debug() + << "Runtime detection reports SSE2 instructions available: " + << "SSE2-optimized sRGB encoding routines enabled.\n"; + } else { + pnmimage_cat.debug() + << "Runtime detection reports SSE2 instructions unavailable: " + << "SSE2-optimized sRGB encoding routines disabled.\n"; + } +#else + pnmimage_cat.debug() + << "No runtime detection mechanism for SSE2 instructions available: " + << "SSE2-optimized sRGB encoding routines disabled.\n"; +#endif + checked = true; + } + } + + return has_support; +} + +#endif // __SSE2__ diff --git a/panda/src/pnmimage/convert_srgb.h b/panda/src/pnmimage/convert_srgb.h new file mode 100644 index 0000000000..6d59019d84 --- /dev/null +++ b/panda/src/pnmimage/convert_srgb.h @@ -0,0 +1,59 @@ +// Filename: convert_srgb.h +// Created by: rdb (13Nov14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + +#ifndef CONVERT_SRGB_H +#define CONVERT_SRGB_H + +#include "pandabase.h" +#include "luse.h" +#include "pnmimage_base.h" + +// The below functions can encode and decode sRGB colors in various +// representations. Some of them are implemented using look-up tables, +// some others using SSE2 intrinsics. +extern EXPCL_PANDA_PNMIMAGE const unsigned char to_srgb8_table[256]; +extern EXPCL_PANDA_PNMIMAGE const unsigned char to_linear_uchar_table[256]; +extern EXPCL_PANDA_PNMIMAGE const float to_linear_float_table[256]; + +EXPCL_PANDA_PNMIMAGE CONSTEXPR float decode_sRGB_float(unsigned char val); +EXPCL_PANDA_PNMIMAGE INLINE float decode_sRGB_float(float val); +EXPCL_PANDA_PNMIMAGE CONSTEXPR unsigned char decode_sRGB_uchar(unsigned char val); +EXPCL_PANDA_PNMIMAGE INLINE unsigned char decode_sRGB_uchar(float val); + +EXPCL_PANDA_PNMIMAGE INLINE float encode_sRGB_float(unsigned char val); +EXPCL_PANDA_PNMIMAGE INLINE float encode_sRGB_float(float val); +EXPCL_PANDA_PNMIMAGE CONSTEXPR unsigned char encode_sRGB_uchar(unsigned char val); +EXPCL_PANDA_PNMIMAGE INLINE unsigned char encode_sRGB_uchar(float val); + +// These functions convert more than one component in one go, +// which can be faster due to vectorization. +EXPCL_PANDA_PNMIMAGE INLINE void encode_sRGB_uchar(const LColorf &from, + xel &into); +EXPCL_PANDA_PNMIMAGE INLINE void encode_sRGB_uchar(const LColorf &from, + xel &into, xelval &into_alpha); + +// Use these functions if you know that SSE2 support is available. +// Otherwise, they will crash! +EXPCL_PANDA_PNMIMAGE unsigned char encode_sRGB_uchar_sse2(float val); +EXPCL_PANDA_PNMIMAGE void encode_sRGB_uchar_sse2(const LColorf &from, + xel &into); +EXPCL_PANDA_PNMIMAGE void encode_sRGB_uchar_sse2(const LColorf &from, + xel &into, xelval &into_alpha); + +// Use the following to find out if you can call either of the above. +EXPCL_PANDA_PNMIMAGE bool has_sse2_sRGB_encode(); + +#include "convert_srgb.I" + +#endif diff --git a/panda/src/pnmimage/convert_srgb_sse2.cxx b/panda/src/pnmimage/convert_srgb_sse2.cxx new file mode 100644 index 0000000000..e83a00394d --- /dev/null +++ b/panda/src/pnmimage/convert_srgb_sse2.cxx @@ -0,0 +1,151 @@ +// Filename: convert_srgb_sse2.cxx +// Created by: rdb (13Nov14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + +// This file should always be compiled with SSE2 support. These +// functions will only be called when SSE2 support is detected at +// run-time. + +#include "convert_srgb.h" +#include "luse.h" + +#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) + +#include +#include + +static INLINE __m128i _encode_sRGB_sse2_mul255(__m128 val) { + // This an SSE2-based approximation of the sRGB encode function. + // It has a maximum error of around 0.001, which is by far small + // enough for a uchar. It is also at least 10x as fast as the + // original; up to 40x when taking advantage of vectorization. + // Note that the fourth float is only multiplied with 255. + + // Part of the code in this function is derived from: + // http://stackoverflow.com/a/6486630/2135754 + + // Clamp to 0-1 range. + val = _mm_max_ps(val, _mm_set1_ps(0.0f)); + val = _mm_min_ps(val, _mm_set1_ps(1.0f)); + + // Pre-multiply with constant factor to adjust for exp bias. + __m128 xf = _mm_mul_ps(val, _mm_set1_ps(6.3307e18f)); + + // Approximate logarithm by... casting! + xf = _mm_cvtepi32_ps(_mm_castps_si128(xf)); + + // Multiply 'logarithm' by power. + xf = _mm_mul_ps(xf, _mm_set1_ps(2.0f / 3.0f)); + + // Reverse operation of above: cast the other way. + xf = _mm_castsi128_ps(_mm_cvtps_epi32(xf)); + + // Make an overestimate and an underestimate. + __m128 xover = _mm_mul_ps(val, xf); + __m128 xunder = _mm_mul_ps(_mm_mul_ps(val, val), + _mm_rsqrt_ps(xf)); + + // Average the two factors, with a slight bias. + __m128 xavg = _mm_mul_ps(_mm_add_ps(xover, xunder), + _mm_set1_ps(0.5286098f)); + + // Take square root twice. Note that this is faster than + // the more expensive _mm_sqrt_ps instruction. + xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg)); + xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg)); + + // Bring it into the correct range. These factors are determined + // not on the basis of accuracy, but are chosen such that the + // decoder lookup table produces an equivalent result for any value. + xavg = _mm_mul_ps(xavg, _mm_set1_ps(269.122f)); + xavg = _mm_sub_ps(xavg, _mm_set1_ps(13.55f)); + + // Compute the linear section. This is also the path that + // the alpha channel takes, so we set the alpha multiplier + // to 255 (since alpha is not sRGB-converted). + __m128 lval = _mm_mul_ps(val, + _mm_set_ps(255.0f, 3294.6f, 3294.6f, 3294.6f)); + + lval = _mm_add_ps(lval, _mm_set1_ps(0.5f)); + + // Decide which version to return. Rig the alpha + // comparator to always fail so that the linear path + // is always chosen for alpha. + __m128 mask = _mm_cmpge_ps(val, + _mm_set_ps(2.0f, 0.0031308f, 0.0031308f, 0.0031308f)); + + // This is a non-branching way to return one or the other value. + return _mm_cvttps_epi32(_mm_or_ps( + _mm_and_ps(mask, xavg), + _mm_andnot_ps(mask, lval))); +} + +unsigned char +encode_sRGB_uchar_sse2(float val) { + // Running only a single component through this function is still + // way faster than the equivalent non-SSE2 version. + return (unsigned char) + _mm_extract_epi32(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0); +} + +void +encode_sRGB_uchar_sse2(const LColorf &color, xel &into) { +#ifdef LINMATH_ALIGN + __m128 vec = _mm_load_ps(color.get_data()); +#else + __m128 vec = _mm_loadu_ps(color.get_data()); +#endif + + __m128i vals = _encode_sRGB_sse2_mul255(vec); + into.r = _mm_extract_epi32(vals, 0); + into.g = _mm_extract_epi32(vals, 1); + into.b = _mm_extract_epi32(vals, 2); +} + +void +encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) { +#ifdef LINMATH_ALIGN + __m128 vec = _mm_load_ps(color.get_data()); +#else + __m128 vec = _mm_loadu_ps(color.get_data()); +#endif + + __m128i vals = _encode_sRGB_sse2_mul255(vec); + into.r = _mm_extract_epi32(vals, 0); + into.g = _mm_extract_epi32(vals, 1); + into.b = _mm_extract_epi32(vals, 2); + into_alpha = _mm_extract_epi32(vals, 3); +} + +#else +// Somehow we're still compiling this without SSE2 support. We'll +// still have to define these functions, but emit a warning that the +// build system isn't configured properly. +#warning convert_srgb_sse2.cxx is being compiled without SSE2 support! + +unsigned char +encode_sRGB_uchar_sse2(float val) { + return encode_sRGB_uchar(val); +} + +void +encode_sRGB_uchar_sse2(const LColorf &color, xel &into) { + encode_sRGB_uchar(color, into); +} + +void +encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) { + encode_sRGB_uchar(color, into, into_alpha); +} + +#endif diff --git a/panda/src/pnmimage/p3pnmimage_composite1.cxx b/panda/src/pnmimage/p3pnmimage_composite1.cxx index 0e290cbc95..85dfe7f0a1 100644 --- a/panda/src/pnmimage/p3pnmimage_composite1.cxx +++ b/panda/src/pnmimage/p3pnmimage_composite1.cxx @@ -1,4 +1,5 @@ #include "config_pnmimage.cxx" +#include "convert_srgb.cxx" #include "pfmFile.cxx" #include "pnm-image-filter.cxx" #include "pnmbitio.cxx" diff --git a/panda/src/tinydisplay/zbuffer.h b/panda/src/tinydisplay/zbuffer.h index a4c8a2015c..46190ef4e6 100644 --- a/panda/src/tinydisplay/zbuffer.h +++ b/panda/src/tinydisplay/zbuffer.h @@ -75,7 +75,7 @@ typedef unsigned int ZPOINT; ((((unsigned int)(a) << 24) & 0xff000000) | (((unsigned int)(r) << 16) & 0xff0000) | (((unsigned int)(g) << 8) & 0xff00) | (unsigned int)(b)) #define SRGB_TO_PIXEL(r,g,b) \ - ((encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB10[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4])) + ((encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4])) #define SRGBA_TO_PIXEL(r,g,b,a) \ ((((unsigned int)(a) << 16) & 0xff000000) | (encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4]))