mirror of
https://github.com/panda3d/panda3d.git
synced 2025-10-04 02:42:49 -04:00
SSE2-enhanced sRGB encode (with runtime CPU detection)
This commit is contained in:
parent
e33cac03fe
commit
04d4309b14
@ -3204,7 +3204,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj,
|
||||
}
|
||||
extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");";
|
||||
extra_param_check += " && " + param_name + "_long != NULL";
|
||||
pexpr_string = "PyLong_AsUnsignedLongLong(" + param_name + "_long)";
|
||||
pexpr_string = "(" + type->get_local_name(&parser) + ")" +
|
||||
"PyLong_AsUnsignedLongLong(" + param_name + "_long)";
|
||||
extra_cleanup += "Py_XDECREF(" + param_name + "_long);";
|
||||
expected_params += "unsigned long long";
|
||||
++num_params;
|
||||
@ -3219,7 +3220,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj,
|
||||
}
|
||||
extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");";
|
||||
extra_param_check += " && " + param_name + "_long != NULL";
|
||||
pexpr_string = "PyLong_AsLongLong(" + param_name + "_long)";
|
||||
pexpr_string = "(" + type->get_local_name(&parser) + ")" +
|
||||
"PyLong_AsLongLong(" + param_name + "_long)";
|
||||
extra_cleanup += "Py_XDECREF(" + param_name + "_long);";
|
||||
expected_params += "long long";
|
||||
++num_params;
|
||||
@ -3234,7 +3236,8 @@ write_function_instance(ostream &out, InterfaceMaker::Object *obj,
|
||||
}
|
||||
extra_convert += "PyObject *" + param_name + "_long = PyNumber_Long(" + param_name + ");";
|
||||
extra_param_check += " && " + param_name + "_long != NULL";
|
||||
pexpr_string = "PyLong_AsUnsignedLong(" + param_name + "_long)";
|
||||
pexpr_string = "(" + type->get_local_name(&parser) + ")" +
|
||||
"PyLong_AsUnsignedLong(" + param_name + "_long)";
|
||||
extra_cleanup += "Py_XDECREF(" + param_name + "_long);";
|
||||
expected_params += "unsigned int";
|
||||
++num_params;
|
||||
|
@ -949,7 +949,7 @@ def CompileCxx(obj,src,opts):
|
||||
if PkgSkip("TOUCHINPUT") == 0:
|
||||
cmd += "/DWINVER=0x601 "
|
||||
cmd += "/Fo" + obj + " /nologo /c"
|
||||
if (GetTargetArch() != 'x64' and PkgSkip("SSE2") == 0):
|
||||
if GetTargetArch() != 'x64' and (not PkgSkip("SSE2") or 'SSE2' in opts):
|
||||
cmd += " /arch:SSE2"
|
||||
for x in ipath: cmd += " /I" + x
|
||||
for (opt,dir) in INCDIRECTORIES:
|
||||
@ -1160,7 +1160,7 @@ def CompileCxx(obj,src,opts):
|
||||
if optlevel >= 4 or GetTarget() == "android":
|
||||
cmd += " -fno-rtti"
|
||||
|
||||
if PkgSkip("SSE2") == 0 and not arch.startswith("arm"):
|
||||
if ('SSE2' in opts or not PkgSkip("SSE2")) and not arch.startswith("arm"):
|
||||
cmd += " -msse2"
|
||||
|
||||
if optlevel >= 3:
|
||||
@ -1705,7 +1705,7 @@ def RunGenPyCode(target, inputs, opts):
|
||||
if (PkgSkip("PYTHON") != 0):
|
||||
return
|
||||
|
||||
cmdstr = sys.executable + " "
|
||||
cmdstr = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " "
|
||||
if sys.version_info >= (2, 6):
|
||||
cmdstr += "-B "
|
||||
|
||||
@ -1729,7 +1729,7 @@ def RunGenPyCode(target, inputs, opts):
|
||||
def FreezePy(target, inputs, opts):
|
||||
assert len(inputs) > 0
|
||||
# Make sure this function isn't called before genpycode is run.
|
||||
cmdstr = sys.executable + " "
|
||||
cmdstr = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " "
|
||||
if sys.version_info >= (2, 6):
|
||||
cmdstr += "-B "
|
||||
|
||||
@ -1757,7 +1757,7 @@ def FreezePy(target, inputs, opts):
|
||||
def Package(target, inputs, opts):
|
||||
assert len(inputs) == 1
|
||||
# Invoke the ppackage script.
|
||||
command = sys.executable + " "
|
||||
command = BracketNameWithQuotes(SDK["PYTHONEXEC"]) + " "
|
||||
if GetOptimizeOption(opts) >= 4:
|
||||
command += "-OO "
|
||||
|
||||
@ -3192,6 +3192,7 @@ if (not RUNTIME):
|
||||
OPTS=['DIR:panda/src/pnmimage', 'BUILDING:PANDA', 'ZLIB']
|
||||
TargetAdd('p3pnmimage_composite1.obj', opts=OPTS, input='p3pnmimage_composite1.cxx')
|
||||
TargetAdd('p3pnmimage_composite2.obj', opts=OPTS, input='p3pnmimage_composite2.cxx')
|
||||
TargetAdd('p3pnmimage_convert_srgb_sse2.obj', opts=OPTS+['SSE2'], input='convert_srgb_sse2.cxx')
|
||||
|
||||
OPTS=['DIR:panda/src/pnmimage', 'ZLIB']
|
||||
IGATEFILES=GetDirectoryContents('panda/src/pnmimage', ["*.h", "*_composite*.cxx"])
|
||||
@ -3621,6 +3622,7 @@ if (not RUNTIME):
|
||||
TargetAdd('libpanda.dll', input='p3pnmimagetypes_composite2.obj')
|
||||
TargetAdd('libpanda.dll', input='p3pnmimage_composite1.obj')
|
||||
TargetAdd('libpanda.dll', input='p3pnmimage_composite2.obj')
|
||||
TargetAdd('libpanda.dll', input='p3pnmimage_convert_srgb_sse2.obj')
|
||||
TargetAdd('libpanda.dll', input='p3text_composite1.obj')
|
||||
TargetAdd('libpanda.dll', input='p3text_composite2.obj')
|
||||
TargetAdd('libpanda.dll', input='p3tform_composite1.obj')
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "pbitops.h"
|
||||
#include "streamReader.h"
|
||||
#include "texturePeeker.h"
|
||||
#include "convert_srgb.h"
|
||||
|
||||
#ifdef HAVE_SQUISH
|
||||
#include <squish.h>
|
||||
@ -131,46 +132,6 @@ struct DDSHeader {
|
||||
DDSCaps2 caps;
|
||||
};
|
||||
|
||||
// This table is used for converting unsigned char texture values in an sRGB
|
||||
// texture to linear RGB values, for use in mipmap generation.
|
||||
static float srgb_to_lrgbf[256] = {0.000000f, 0.000304f, 0.000607f, 0.000911f,
|
||||
0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
|
||||
0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f,
|
||||
0.006049f, 0.006512f, 0.006995f, 0.007499f, 0.008023f, 0.008568f, 0.009134f,
|
||||
0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f,
|
||||
0.014444f, 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f,
|
||||
0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, 0.025187f, 0.026241f,
|
||||
0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f,
|
||||
0.035601f, 0.036889f, 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f,
|
||||
0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, 0.054480f,
|
||||
0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f,
|
||||
0.068478f, 0.070360f, 0.072272f, 0.074214f, 0.076185f, 0.078187f, 0.080220f,
|
||||
0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
|
||||
0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f,
|
||||
0.114435f, 0.116971f, 0.119538f, 0.122139f, 0.124772f, 0.127438f, 0.130136f,
|
||||
0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f,
|
||||
0.152926f, 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f,
|
||||
0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, 0.191202f, 0.194618f,
|
||||
0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f,
|
||||
0.223228f, 0.226966f, 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f,
|
||||
0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, 0.274677f,
|
||||
0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f,
|
||||
0.309469f, 0.313989f, 0.318547f, 0.323143f, 0.327778f, 0.332452f, 0.337164f,
|
||||
0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
|
||||
0.376262f, 0.381326f, 0.386429f, 0.391572f, 0.396755f, 0.401978f, 0.407240f,
|
||||
0.412543f, 0.417885f, 0.423268f, 0.428690f, 0.434154f, 0.439657f, 0.445201f,
|
||||
0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473531f, 0.479320f, 0.485150f,
|
||||
0.491021f, 0.496933f, 0.502886f, 0.508881f, 0.514918f, 0.520996f, 0.527115f,
|
||||
0.533276f, 0.539479f, 0.545724f, 0.552011f, 0.558340f, 0.564712f, 0.571125f,
|
||||
0.577580f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f,
|
||||
0.623960f, 0.630757f, 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f,
|
||||
0.672443f, 0.679542f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, 0.715694f,
|
||||
0.723055f, 0.730461f, 0.737910f, 0.745404f, 0.752942f, 0.760525f, 0.768151f,
|
||||
0.775822f, 0.783538f, 0.791298f, 0.799103f, 0.806952f, 0.814847f, 0.822786f,
|
||||
0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
|
||||
0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f,
|
||||
0.947307f, 0.955973f, 0.964686f, 0.973445f, 0.982251f, 0.991102f, 1.000000f};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: Texture::Constructor
|
||||
// Access: Published
|
||||
@ -6990,7 +6951,13 @@ do_filter_2d_mipmap_pages(const CData *cdata,
|
||||
// We currently only support sRGB mipmap generation for
|
||||
// unsigned byte textures, due to our use of a lookup table.
|
||||
nassertv(cdata->_component_type == T_unsigned_byte);
|
||||
filter_component = &filter_2d_unsigned_byte_srgb;
|
||||
|
||||
if (has_sse2_sRGB_encode()) {
|
||||
filter_component = &filter_2d_unsigned_byte_srgb_sse2;
|
||||
} else {
|
||||
filter_component = &filter_2d_unsigned_byte_srgb;
|
||||
}
|
||||
|
||||
// Alpha is always linear.
|
||||
filter_alpha = &filter_2d_unsigned_byte;
|
||||
|
||||
@ -7140,7 +7107,13 @@ do_filter_3d_mipmap_level(const CData *cdata,
|
||||
// We currently only support sRGB mipmap generation for
|
||||
// unsigned byte textures, due to our use of a lookup table.
|
||||
nassertv(cdata->_component_type == T_unsigned_byte);
|
||||
filter_component = &filter_3d_unsigned_byte_srgb;
|
||||
|
||||
if (has_sse2_sRGB_encode()) {
|
||||
filter_component = &filter_3d_unsigned_byte_srgb_sse2;
|
||||
} else {
|
||||
filter_component = &filter_3d_unsigned_byte_srgb;
|
||||
}
|
||||
|
||||
// Alpha is always linear.
|
||||
filter_alpha = &filter_3d_unsigned_byte;
|
||||
|
||||
@ -7385,18 +7358,32 @@ filter_2d_unsigned_byte(unsigned char *&p, const unsigned char *&q,
|
||||
void Texture::
|
||||
filter_2d_unsigned_byte_srgb(unsigned char *&p, const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size) {
|
||||
float result = (srgb_to_lrgbf[q[0]] +
|
||||
srgb_to_lrgbf[q[pixel_size]] +
|
||||
srgb_to_lrgbf[q[row_size]] +
|
||||
srgb_to_lrgbf[q[pixel_size + row_size]]) / 4.0f;
|
||||
float result = (decode_sRGB_float(q[0]) +
|
||||
decode_sRGB_float(q[pixel_size]) +
|
||||
decode_sRGB_float(q[row_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size]));
|
||||
|
||||
// This is based on the formula out of the EXT_texture_sRGB
|
||||
// specification, except the factors are multiplied with 255.0f.
|
||||
if (result < 0.0031308f) {
|
||||
*p = (unsigned char)(result * 3294.6f);
|
||||
} else {
|
||||
*p = (unsigned char)(269.025f * powf(result, 0.41666f) - 14.025f);
|
||||
}
|
||||
*p = encode_sRGB_uchar(result * 0.25f);
|
||||
++p;
|
||||
++q;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: Texture::filter_2d_unsigned_byte_srgb_sse2
|
||||
// Access: Public, Static
|
||||
// Description: Averages a 2x2 block of pixel components into a
|
||||
// single pixel component, for producing the next mipmap
|
||||
// level. Increments p and q to the next component.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
void Texture::
|
||||
filter_2d_unsigned_byte_srgb_sse2(unsigned char *&p, const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size) {
|
||||
float result = (decode_sRGB_float(q[0]) +
|
||||
decode_sRGB_float(q[pixel_size]) +
|
||||
decode_sRGB_float(q[row_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size]));
|
||||
|
||||
*p = encode_sRGB_uchar_sse2(result * 0.25f);
|
||||
++p;
|
||||
++q;
|
||||
}
|
||||
@ -7470,22 +7457,40 @@ filter_3d_unsigned_byte(unsigned char *&p, const unsigned char *&q,
|
||||
void Texture::
|
||||
filter_3d_unsigned_byte_srgb(unsigned char *&p, const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size, size_t page_size) {
|
||||
float result = (srgb_to_lrgbf[q[0]] +
|
||||
srgb_to_lrgbf[q[pixel_size]] +
|
||||
srgb_to_lrgbf[q[row_size]] +
|
||||
srgb_to_lrgbf[q[pixel_size + row_size]] +
|
||||
srgb_to_lrgbf[q[page_size]] +
|
||||
srgb_to_lrgbf[q[pixel_size + page_size]] +
|
||||
srgb_to_lrgbf[q[row_size + page_size]] +
|
||||
srgb_to_lrgbf[q[pixel_size + row_size + page_size]]) / 8.0f;
|
||||
float result = (decode_sRGB_float(q[0]) +
|
||||
decode_sRGB_float(q[pixel_size]) +
|
||||
decode_sRGB_float(q[row_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size]) +
|
||||
decode_sRGB_float(q[page_size]) +
|
||||
decode_sRGB_float(q[pixel_size + page_size]) +
|
||||
decode_sRGB_float(q[row_size + page_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size + page_size]));
|
||||
|
||||
// This is based on the formula out of the EXT_texture_sRGB
|
||||
// specification, except the factors are multiplied with 255.0f.
|
||||
if (result < 0.0031308f) {
|
||||
*p = (unsigned char)(result * 3294.6f);
|
||||
} else {
|
||||
*p = (unsigned char)(269.025f * powf(result, 0.41666f) - 14.025f);
|
||||
}
|
||||
*p = encode_sRGB_uchar(result * 0.125f);
|
||||
++p;
|
||||
++q;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: Texture::filter_3d_unsigned_byte_srgb_sse2
|
||||
// Access: Public, Static
|
||||
// Description: Averages a 2x2x2 block of pixel components into a
|
||||
// single pixel component, for producing the next mipmap
|
||||
// level. Increments p and q to the next component.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
void Texture::
|
||||
filter_3d_unsigned_byte_srgb_sse2(unsigned char *&p, const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size, size_t page_size) {
|
||||
float result = (decode_sRGB_float(q[0]) +
|
||||
decode_sRGB_float(q[pixel_size]) +
|
||||
decode_sRGB_float(q[row_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size]) +
|
||||
decode_sRGB_float(q[page_size]) +
|
||||
decode_sRGB_float(q[pixel_size + page_size]) +
|
||||
decode_sRGB_float(q[row_size + page_size]) +
|
||||
decode_sRGB_float(q[pixel_size + row_size + page_size]));
|
||||
|
||||
*p = encode_sRGB_uchar_sse2(result * 0.125f);
|
||||
++p;
|
||||
++q;
|
||||
}
|
||||
|
@ -749,6 +749,9 @@ private:
|
||||
static void filter_2d_unsigned_byte_srgb(unsigned char *&p,
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size);
|
||||
static void filter_2d_unsigned_byte_srgb_sse2(unsigned char *&p,
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size);
|
||||
static void filter_2d_unsigned_short(unsigned char *&p,
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size);
|
||||
@ -763,6 +766,10 @@ private:
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size,
|
||||
size_t page_size);
|
||||
static void filter_3d_unsigned_byte_srgb_sse2(unsigned char *&p,
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size,
|
||||
size_t page_size);
|
||||
static void filter_3d_unsigned_short(unsigned char *&p,
|
||||
const unsigned char *&q,
|
||||
size_t pixel_size, size_t row_size,
|
||||
|
175
panda/src/pnmimage/convert_srgb.I
Normal file
175
panda/src/pnmimage/convert_srgb.I
Normal file
@ -0,0 +1,175 @@
|
||||
// Filename: convert_srgb.I
|
||||
// Created by: rdb (29Oct14)
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// PANDA 3D SOFTWARE
|
||||
// Copyright (c) Carnegie Mellon University. All rights reserved.
|
||||
//
|
||||
// All use of this software is subject to the terms of the revised BSD
|
||||
// license. You should have received a copy of this license along
|
||||
// with this source code in a file named "LICENSE."
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: decode_sRGB_float
|
||||
// Description: Decodes the sRGB-encoded unsigned char value to
|
||||
// a linearized float in the range 0-1.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
CONSTEXPR float decode_sRGB_float(unsigned char val) {
|
||||
return to_linear_float_table[val];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_float
|
||||
// Description: Decodes the sRGB-encoded floating-point value in
|
||||
// the range 0-1 to a linearized float in the range
|
||||
// 0-1. Inputs outside this range produce invalid
|
||||
// results.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE float decode_sRGB_float(float val) {
|
||||
return (val <= 0.04045f)
|
||||
? (val * (1.f / 12.92f))
|
||||
: cpow((val + 0.055f) * (1.f / 1.055f), 2.4f);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: decode_sRGB_uchar
|
||||
// Description: Decodes the sRGB-encoded unsigned char value to
|
||||
// a linearized unsigned char value.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
CONSTEXPR unsigned char decode_sRGB_uchar(unsigned char val) {
|
||||
return to_linear_uchar_table[val];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: decode_sRGB_uchar
|
||||
// Description: Decodes the sRGB-encoded floating-point value in
|
||||
// the range 0-1 to a linearized unsigned char value.
|
||||
// Inputs outside this range are clamped.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE unsigned char decode_sRGB_uchar(float val) {
|
||||
return (val <= 0.04045f)
|
||||
? (unsigned char)(max(0.f, val) * (255.f / 12.92f) + 0.5f)
|
||||
: (unsigned char)(cpow((min(val, 1.f) + 0.055f) * (1.f / 1.055f), 2.4f) * 255.f + 0.5f);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_float
|
||||
// Description: Encodes the linearized unsigned char value to an
|
||||
// sRGB-encoded floating-point value in ther range 0-1.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE float
|
||||
encode_sRGB_float(unsigned char val) {
|
||||
// This seems like a very unlikely use case, so I didn't bother
|
||||
// making a look-up table for this.
|
||||
return (val == 0) ? 0
|
||||
: (1.055f * cpow((float)val * (1.f / 255.f), 0.41666f) - 0.055);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_float
|
||||
// Description: Encodes the linearized floating-point value in the
|
||||
// range 0-1 to an sRGB-encoded float in the range
|
||||
// 0-1. Inputs outside this range produce invalid
|
||||
// results.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE float
|
||||
encode_sRGB_float(float val) {
|
||||
return (val < 0.0031308f)
|
||||
? (val * 12.92f)
|
||||
: (1.055f * cpow(val, 0.41666f) - 0.055);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_uchar
|
||||
// Description: Encodes the linearized unsigned char value to an
|
||||
// sRGB-encoded unsigned char value.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
CONSTEXPR unsigned char
|
||||
encode_sRGB_uchar(unsigned char val) {
|
||||
return to_srgb8_table[val];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_uchar
|
||||
// Description: Encodes the linearized floating-point value in the
|
||||
// range 0-1 to an sRGB-encoded unsigned char value.
|
||||
// Inputs outside this range are clamped.
|
||||
//
|
||||
// When SSE2 support is known at compile time, this
|
||||
// automatically uses an optimized version. Otherwise,
|
||||
// it does not attempt runtime CPU detection. If you
|
||||
// know that SSE2 is supported (ie. if the function
|
||||
// has_sse2_sRGB_encode() returns true) you should
|
||||
// call encode_sRGB_uchar_sse2 instead.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE unsigned char
|
||||
encode_sRGB_uchar(float val) {
|
||||
#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64)
|
||||
// Use a highly optimized approximation that has more than enough
|
||||
// accuracy for an unsigned char.
|
||||
return encode_sRGB_uchar_sse2(val);
|
||||
#else
|
||||
return (val < 0.0031308f)
|
||||
? (unsigned char) (max(0.f, val) * 3294.6f + 0.5f)
|
||||
: (unsigned char) (269.025f * cpow(min(val, 1.f), 0.41666f) - 13.525f);
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_uchar
|
||||
// Description: Encodes the linearized floating-point color value
|
||||
// an sRGB-encoded xel in the range 0-255.
|
||||
//
|
||||
// When SSE2 support is known at compile time, this
|
||||
// automatically uses an optimized version. Otherwise,
|
||||
// it does not attempt runtime CPU detection. If you
|
||||
// know that SSE2 is supported (ie. if the function
|
||||
// has_sse2_sRGB_encode() returns true) you should
|
||||
// call encode_sRGB_uchar_sse2 instead.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE void
|
||||
encode_sRGB_uchar(const LColorf &color, xel &into) {
|
||||
#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64)
|
||||
// SSE2 support compiled-in; we're guaranteed to have it.
|
||||
encode_sRGB_uchar_sse2(color, into);
|
||||
#else
|
||||
// Boring, slow, non-SSE2 version.
|
||||
PPM_ASSIGN(into,
|
||||
encode_sRGB_uchar(color[0]),
|
||||
encode_sRGB_uchar(color[1]),
|
||||
encode_sRGB_uchar(color[2]));
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Function: encode_sRGB_uchar
|
||||
// Description: Encodes the linearized floating-point color value
|
||||
// an sRGB-encoded xel and alpha in the range 0-255.
|
||||
// The alpha value is not sRGB-encoded.
|
||||
//
|
||||
// When SSE2 support is known at compile time, this
|
||||
// automatically uses an optimized version. Otherwise,
|
||||
// it does not attempt runtime CPU detection. If you
|
||||
// know that SSE2 is supported (ie. if the function
|
||||
// has_sse2_sRGB_encode() returns true) you should
|
||||
// call encode_sRGB_uchar_sse2 instead.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
INLINE void
|
||||
encode_sRGB_uchar(const LColorf &color, xel &into, xelval &into_alpha) {
|
||||
#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64)
|
||||
// SSE2 support compiled-in; we're guaranteed to have it.
|
||||
encode_sRGB_uchar_sse2(color, into, into_alpha);
|
||||
#else
|
||||
// Boring, slow, non-SSE2 version.
|
||||
PPM_ASSIGN(into,
|
||||
encode_sRGB_uchar(color[0]),
|
||||
encode_sRGB_uchar(color[1]),
|
||||
encode_sRGB_uchar(color[2]));
|
||||
|
||||
into_alpha = (xelval) (color[3] * 255.f + 0.5f);
|
||||
#endif
|
||||
}
|
165
panda/src/pnmimage/convert_srgb.cxx
Normal file
165
panda/src/pnmimage/convert_srgb.cxx
Normal file
@ -0,0 +1,165 @@
|
||||
// Filename: convert_srgb.cxx
|
||||
// Created by: rdb (13Nov14)
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// PANDA 3D SOFTWARE
|
||||
// Copyright (c) Carnegie Mellon University. All rights reserved.
|
||||
//
|
||||
// All use of this software is subject to the terms of the revised BSD
|
||||
// license. You should have received a copy of this license along
|
||||
// with this source code in a file named "LICENSE."
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "convert_srgb.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Lookup tables for converting from unsigned char formats.
|
||||
ALIGN_64BYTE const
|
||||
unsigned char to_srgb8_table[256] = { 0x00, 0x0d, 0x16, 0x1c, 0x22, 0x26, 0x2a,
|
||||
0x2e, 0x32, 0x35, 0x38, 0x3b, 0x3d, 0x40, 0x42, 0x45, 0x47, 0x49, 0x4b, 0x4d,
|
||||
0x4f, 0x51, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5d, 0x5f, 0x60, 0x62, 0x63,
|
||||
0x65, 0x66, 0x68, 0x69, 0x6a, 0x6c, 0x6d, 0x6e, 0x70, 0x71, 0x72, 0x73, 0x75,
|
||||
0x76, 0x77, 0x78, 0x79, 0x7a, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
|
||||
0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
|
||||
0x91, 0x92, 0x93, 0x94, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b,
|
||||
0x9c, 0x9d, 0x9e, 0x9f, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa3, 0xa4, 0xa5, 0xa6,
|
||||
0xa7, 0xa7, 0xa8, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xad, 0xad, 0xae, 0xaf, 0xaf,
|
||||
0xb0, 0xb1, 0xb2, 0xb2, 0xb3, 0xb4, 0xb4, 0xb5, 0xb6, 0xb6, 0xb7, 0xb8, 0xb9,
|
||||
0xb9, 0xba, 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf, 0xc0, 0xc0, 0xc1,
|
||||
0xc2, 0xc2, 0xc3, 0xc4, 0xc4, 0xc5, 0xc5, 0xc6, 0xc7, 0xc7, 0xc8, 0xc8, 0xc9,
|
||||
0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xcd, 0xce, 0xce, 0xcf, 0xd0, 0xd0, 0xd1,
|
||||
0xd1, 0xd2, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5, 0xd6, 0xd6, 0xd7, 0xd7, 0xd8,
|
||||
0xd8, 0xd9, 0xda, 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde, 0xde, 0xdf,
|
||||
0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe2, 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6,
|
||||
0xe6, 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xe9, 0xea, 0xea, 0xeb, 0xeb, 0xec, 0xec,
|
||||
0xed, 0xed, 0xee, 0xee, 0xee, 0xef, 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2,
|
||||
0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8,
|
||||
0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe,
|
||||
0xff, 0xff};
|
||||
|
||||
ALIGN_64BYTE const
|
||||
unsigned char to_linear_uchar_table[256] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02,
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07,
|
||||
0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x0a, 0x0b,
|
||||
0x0b, 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, 0x10,
|
||||
0x11, 0x11, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, 0x14, 0x15, 0x16, 0x16, 0x17,
|
||||
0x17, 0x18, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1e,
|
||||
0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x28,
|
||||
0x29, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33,
|
||||
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4c, 0x4d,
|
||||
0x4e, 0x4f, 0x50, 0x51, 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x5a, 0x5b, 0x5c,
|
||||
0x5d, 0x5f, 0x60, 0x61, 0x63, 0x64, 0x65, 0x67, 0x68, 0x69, 0x6b, 0x6c, 0x6d,
|
||||
0x6f, 0x70, 0x72, 0x73, 0x74, 0x76, 0x77, 0x79, 0x7a, 0x7c, 0x7d, 0x7f, 0x80,
|
||||
0x82, 0x83, 0x85, 0x86, 0x88, 0x8a, 0x8b, 0x8d, 0x8e, 0x90, 0x92, 0x93, 0x95,
|
||||
0x97, 0x98, 0x9a, 0x9c, 0x9d, 0x9f, 0xa1, 0xa3, 0xa4, 0xa6, 0xa8, 0xaa, 0xab,
|
||||
0xad, 0xaf, 0xb1, 0xb3, 0xb5, 0xb7, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4,
|
||||
0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
|
||||
0xe0, 0xe2, 0xe5, 0xe7, 0xe9, 0xeb, 0xed, 0xef, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa,
|
||||
0xfd, 0xff};
|
||||
|
||||
ALIGN_64BYTE
|
||||
const float to_linear_float_table[256] = { 0, 0.000304f, 0.000607f, 0.000911f,
|
||||
0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
|
||||
0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f,
|
||||
0.006049f, 0.006512f, 0.006995f, 0.007499f, 0.008023f, 0.008568f, 0.009134f,
|
||||
0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f,
|
||||
0.014444f, 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f,
|
||||
0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, 0.025187f, 0.026241f,
|
||||
0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f,
|
||||
0.035601f, 0.036889f, 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f,
|
||||
0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, 0.054480f,
|
||||
0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f,
|
||||
0.068478f, 0.070360f, 0.072272f, 0.074214f, 0.076185f, 0.078187f, 0.080220f,
|
||||
0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
|
||||
0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f,
|
||||
0.114435f, 0.116971f, 0.119538f, 0.122139f, 0.124772f, 0.127438f, 0.130136f,
|
||||
0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f,
|
||||
0.152926f, 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f,
|
||||
0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, 0.191202f, 0.194618f,
|
||||
0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f,
|
||||
0.223228f, 0.226966f, 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f,
|
||||
0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, 0.274677f,
|
||||
0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f,
|
||||
0.309469f, 0.313989f, 0.318547f, 0.323143f, 0.327778f, 0.332452f, 0.337164f,
|
||||
0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
|
||||
0.376262f, 0.381326f, 0.386429f, 0.391572f, 0.396755f, 0.401978f, 0.407240f,
|
||||
0.412543f, 0.417885f, 0.423268f, 0.428690f, 0.434154f, 0.439657f, 0.445201f,
|
||||
0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473531f, 0.479320f, 0.485150f,
|
||||
0.491021f, 0.496933f, 0.502886f, 0.508881f, 0.514918f, 0.520996f, 0.527115f,
|
||||
0.533276f, 0.539479f, 0.545724f, 0.552011f, 0.558340f, 0.564712f, 0.571125f,
|
||||
0.577580f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f,
|
||||
0.623960f, 0.630757f, 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f,
|
||||
0.672443f, 0.679542f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, 0.715694f,
|
||||
0.723055f, 0.730461f, 0.737910f, 0.745404f, 0.752942f, 0.760525f, 0.768151f,
|
||||
0.775822f, 0.783538f, 0.791298f, 0.799103f, 0.806952f, 0.814847f, 0.822786f,
|
||||
0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
|
||||
0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f,
|
||||
0.947307f, 0.955973f, 0.964686f, 0.973445f, 0.982251f, 0.991102f, 1.000000f};
|
||||
|
||||
|
||||
#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64)
|
||||
// SSE2 support enabled at compile time. No runtime detection mechanism needed.
|
||||
bool
|
||||
has_sse2_sRGB_encode() {
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
// SSE2 support not guaranteed. Use a runtime detection mechanism.
|
||||
|
||||
bool
|
||||
has_sse2_sRGB_encode() {
|
||||
#if defined(__GNUC__)
|
||||
unsigned int a, b, c, d;
|
||||
static const bool has_support =
|
||||
(__get_cpuid(1, &a, &b, &c, &d) == 1 && (d & 0x04000000) != 0);
|
||||
|
||||
#elif defined(_WIN32)
|
||||
static const bool has_support =
|
||||
(IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) != FALSE);
|
||||
|
||||
#else
|
||||
static const bool has_support = false;
|
||||
#endif
|
||||
|
||||
if (pnmimage_cat.is_debug()) {
|
||||
static bool checked = false;
|
||||
if (!checked) {
|
||||
#if defined(__GNUC__) || defined(_WIN32)
|
||||
if (has_support) {
|
||||
pnmimage_cat.debug()
|
||||
<< "Runtime detection reports SSE2 instructions available: "
|
||||
<< "SSE2-optimized sRGB encoding routines enabled.\n";
|
||||
} else {
|
||||
pnmimage_cat.debug()
|
||||
<< "Runtime detection reports SSE2 instructions unavailable: "
|
||||
<< "SSE2-optimized sRGB encoding routines disabled.\n";
|
||||
}
|
||||
#else
|
||||
pnmimage_cat.debug()
|
||||
<< "No runtime detection mechanism for SSE2 instructions available: "
|
||||
<< "SSE2-optimized sRGB encoding routines disabled.\n";
|
||||
#endif
|
||||
checked = true;
|
||||
}
|
||||
}
|
||||
|
||||
return has_support;
|
||||
}
|
||||
|
||||
#endif // __SSE2__
|
59
panda/src/pnmimage/convert_srgb.h
Normal file
59
panda/src/pnmimage/convert_srgb.h
Normal file
@ -0,0 +1,59 @@
|
||||
// Filename: convert_srgb.h
|
||||
// Created by: rdb (13Nov14)
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// PANDA 3D SOFTWARE
|
||||
// Copyright (c) Carnegie Mellon University. All rights reserved.
|
||||
//
|
||||
// All use of this software is subject to the terms of the revised BSD
|
||||
// license. You should have received a copy of this license along
|
||||
// with this source code in a file named "LICENSE."
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef CONVERT_SRGB_H
|
||||
#define CONVERT_SRGB_H
|
||||
|
||||
#include "pandabase.h"
|
||||
#include "luse.h"
|
||||
#include "pnmimage_base.h"
|
||||
|
||||
// The below functions can encode and decode sRGB colors in various
|
||||
// representations. Some of them are implemented using look-up tables,
|
||||
// some others using SSE2 intrinsics.
|
||||
extern EXPCL_PANDA_PNMIMAGE const unsigned char to_srgb8_table[256];
|
||||
extern EXPCL_PANDA_PNMIMAGE const unsigned char to_linear_uchar_table[256];
|
||||
extern EXPCL_PANDA_PNMIMAGE const float to_linear_float_table[256];
|
||||
|
||||
EXPCL_PANDA_PNMIMAGE CONSTEXPR float decode_sRGB_float(unsigned char val);
|
||||
EXPCL_PANDA_PNMIMAGE INLINE float decode_sRGB_float(float val);
|
||||
EXPCL_PANDA_PNMIMAGE CONSTEXPR unsigned char decode_sRGB_uchar(unsigned char val);
|
||||
EXPCL_PANDA_PNMIMAGE INLINE unsigned char decode_sRGB_uchar(float val);
|
||||
|
||||
EXPCL_PANDA_PNMIMAGE INLINE float encode_sRGB_float(unsigned char val);
|
||||
EXPCL_PANDA_PNMIMAGE INLINE float encode_sRGB_float(float val);
|
||||
EXPCL_PANDA_PNMIMAGE CONSTEXPR unsigned char encode_sRGB_uchar(unsigned char val);
|
||||
EXPCL_PANDA_PNMIMAGE INLINE unsigned char encode_sRGB_uchar(float val);
|
||||
|
||||
// These functions convert more than one component in one go,
|
||||
// which can be faster due to vectorization.
|
||||
EXPCL_PANDA_PNMIMAGE INLINE void encode_sRGB_uchar(const LColorf &from,
|
||||
xel &into);
|
||||
EXPCL_PANDA_PNMIMAGE INLINE void encode_sRGB_uchar(const LColorf &from,
|
||||
xel &into, xelval &into_alpha);
|
||||
|
||||
// Use these functions if you know that SSE2 support is available.
|
||||
// Otherwise, they will crash!
|
||||
EXPCL_PANDA_PNMIMAGE unsigned char encode_sRGB_uchar_sse2(float val);
|
||||
EXPCL_PANDA_PNMIMAGE void encode_sRGB_uchar_sse2(const LColorf &from,
|
||||
xel &into);
|
||||
EXPCL_PANDA_PNMIMAGE void encode_sRGB_uchar_sse2(const LColorf &from,
|
||||
xel &into, xelval &into_alpha);
|
||||
|
||||
// Use the following to find out if you can call either of the above.
|
||||
EXPCL_PANDA_PNMIMAGE bool has_sse2_sRGB_encode();
|
||||
|
||||
#include "convert_srgb.I"
|
||||
|
||||
#endif
|
151
panda/src/pnmimage/convert_srgb_sse2.cxx
Normal file
151
panda/src/pnmimage/convert_srgb_sse2.cxx
Normal file
@ -0,0 +1,151 @@
|
||||
// Filename: convert_srgb_sse2.cxx
|
||||
// Created by: rdb (13Nov14)
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// PANDA 3D SOFTWARE
|
||||
// Copyright (c) Carnegie Mellon University. All rights reserved.
|
||||
//
|
||||
// All use of this software is subject to the terms of the revised BSD
|
||||
// license. You should have received a copy of this license along
|
||||
// with this source code in a file named "LICENSE."
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
// This file should always be compiled with SSE2 support. These
|
||||
// functions will only be called when SSE2 support is detected at
|
||||
// run-time.
|
||||
|
||||
#include "convert_srgb.h"
|
||||
#include "luse.h"
|
||||
|
||||
#if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64)
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
static INLINE __m128i _encode_sRGB_sse2_mul255(__m128 val) {
|
||||
// This an SSE2-based approximation of the sRGB encode function.
|
||||
// It has a maximum error of around 0.001, which is by far small
|
||||
// enough for a uchar. It is also at least 10x as fast as the
|
||||
// original; up to 40x when taking advantage of vectorization.
|
||||
// Note that the fourth float is only multiplied with 255.
|
||||
|
||||
// Part of the code in this function is derived from:
|
||||
// http://stackoverflow.com/a/6486630/2135754
|
||||
|
||||
// Clamp to 0-1 range.
|
||||
val = _mm_max_ps(val, _mm_set1_ps(0.0f));
|
||||
val = _mm_min_ps(val, _mm_set1_ps(1.0f));
|
||||
|
||||
// Pre-multiply with constant factor to adjust for exp bias.
|
||||
__m128 xf = _mm_mul_ps(val, _mm_set1_ps(6.3307e18f));
|
||||
|
||||
// Approximate logarithm by... casting!
|
||||
xf = _mm_cvtepi32_ps(_mm_castps_si128(xf));
|
||||
|
||||
// Multiply 'logarithm' by power.
|
||||
xf = _mm_mul_ps(xf, _mm_set1_ps(2.0f / 3.0f));
|
||||
|
||||
// Reverse operation of above: cast the other way.
|
||||
xf = _mm_castsi128_ps(_mm_cvtps_epi32(xf));
|
||||
|
||||
// Make an overestimate and an underestimate.
|
||||
__m128 xover = _mm_mul_ps(val, xf);
|
||||
__m128 xunder = _mm_mul_ps(_mm_mul_ps(val, val),
|
||||
_mm_rsqrt_ps(xf));
|
||||
|
||||
// Average the two factors, with a slight bias.
|
||||
__m128 xavg = _mm_mul_ps(_mm_add_ps(xover, xunder),
|
||||
_mm_set1_ps(0.5286098f));
|
||||
|
||||
// Take square root twice. Note that this is faster than
|
||||
// the more expensive _mm_sqrt_ps instruction.
|
||||
xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
|
||||
xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
|
||||
|
||||
// Bring it into the correct range. These factors are determined
|
||||
// not on the basis of accuracy, but are chosen such that the
|
||||
// decoder lookup table produces an equivalent result for any value.
|
||||
xavg = _mm_mul_ps(xavg, _mm_set1_ps(269.122f));
|
||||
xavg = _mm_sub_ps(xavg, _mm_set1_ps(13.55f));
|
||||
|
||||
// Compute the linear section. This is also the path that
|
||||
// the alpha channel takes, so we set the alpha multiplier
|
||||
// to 255 (since alpha is not sRGB-converted).
|
||||
__m128 lval = _mm_mul_ps(val,
|
||||
_mm_set_ps(255.0f, 3294.6f, 3294.6f, 3294.6f));
|
||||
|
||||
lval = _mm_add_ps(lval, _mm_set1_ps(0.5f));
|
||||
|
||||
// Decide which version to return. Rig the alpha
|
||||
// comparator to always fail so that the linear path
|
||||
// is always chosen for alpha.
|
||||
__m128 mask = _mm_cmpge_ps(val,
|
||||
_mm_set_ps(2.0f, 0.0031308f, 0.0031308f, 0.0031308f));
|
||||
|
||||
// This is a non-branching way to return one or the other value.
|
||||
return _mm_cvttps_epi32(_mm_or_ps(
|
||||
_mm_and_ps(mask, xavg),
|
||||
_mm_andnot_ps(mask, lval)));
|
||||
}
|
||||
|
||||
unsigned char
|
||||
encode_sRGB_uchar_sse2(float val) {
|
||||
// Running only a single component through this function is still
|
||||
// way faster than the equivalent non-SSE2 version.
|
||||
return (unsigned char)
|
||||
_mm_extract_epi32(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0);
|
||||
}
|
||||
|
||||
void
|
||||
encode_sRGB_uchar_sse2(const LColorf &color, xel &into) {
|
||||
#ifdef LINMATH_ALIGN
|
||||
__m128 vec = _mm_load_ps(color.get_data());
|
||||
#else
|
||||
__m128 vec = _mm_loadu_ps(color.get_data());
|
||||
#endif
|
||||
|
||||
__m128i vals = _encode_sRGB_sse2_mul255(vec);
|
||||
into.r = _mm_extract_epi32(vals, 0);
|
||||
into.g = _mm_extract_epi32(vals, 1);
|
||||
into.b = _mm_extract_epi32(vals, 2);
|
||||
}
|
||||
|
||||
void
|
||||
encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) {
|
||||
#ifdef LINMATH_ALIGN
|
||||
__m128 vec = _mm_load_ps(color.get_data());
|
||||
#else
|
||||
__m128 vec = _mm_loadu_ps(color.get_data());
|
||||
#endif
|
||||
|
||||
__m128i vals = _encode_sRGB_sse2_mul255(vec);
|
||||
into.r = _mm_extract_epi32(vals, 0);
|
||||
into.g = _mm_extract_epi32(vals, 1);
|
||||
into.b = _mm_extract_epi32(vals, 2);
|
||||
into_alpha = _mm_extract_epi32(vals, 3);
|
||||
}
|
||||
|
||||
#else
|
||||
// Somehow we're still compiling this without SSE2 support. We'll
|
||||
// still have to define these functions, but emit a warning that the
|
||||
// build system isn't configured properly.
|
||||
#warning convert_srgb_sse2.cxx is being compiled without SSE2 support!
|
||||
|
||||
unsigned char
|
||||
encode_sRGB_uchar_sse2(float val) {
|
||||
return encode_sRGB_uchar(val);
|
||||
}
|
||||
|
||||
void
|
||||
encode_sRGB_uchar_sse2(const LColorf &color, xel &into) {
|
||||
encode_sRGB_uchar(color, into);
|
||||
}
|
||||
|
||||
void
|
||||
encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) {
|
||||
encode_sRGB_uchar(color, into, into_alpha);
|
||||
}
|
||||
|
||||
#endif
|
@ -1,4 +1,5 @@
|
||||
#include "config_pnmimage.cxx"
|
||||
#include "convert_srgb.cxx"
|
||||
#include "pfmFile.cxx"
|
||||
#include "pnm-image-filter.cxx"
|
||||
#include "pnmbitio.cxx"
|
||||
|
@ -75,7 +75,7 @@ typedef unsigned int ZPOINT;
|
||||
((((unsigned int)(a) << 24) & 0xff000000) | (((unsigned int)(r) << 16) & 0xff0000) | (((unsigned int)(g) << 8) & 0xff00) | (unsigned int)(b))
|
||||
|
||||
#define SRGB_TO_PIXEL(r,g,b) \
|
||||
((encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB10[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4]))
|
||||
((encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4]))
|
||||
#define SRGBA_TO_PIXEL(r,g,b,a) \
|
||||
((((unsigned int)(a) << 16) & 0xff000000) | (encode_sRGB[(unsigned int)(r) >> 4] << 16) | (encode_sRGB[(unsigned int)(g) >> 4] << 8) | (encode_sRGB[(unsigned int)(b) >> 4]))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user