From d11cafc374e4a5a7e60e9cbef563caba53cf1725 Mon Sep 17 00:00:00 2001 From: rdb Date: Mon, 2 Mar 2015 16:18:55 +0100 Subject: [PATCH] Replace SSE4 instruction with SSE2 instruction --- panda/src/pnmimage/convert_srgb_sse2.cxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/panda/src/pnmimage/convert_srgb_sse2.cxx b/panda/src/pnmimage/convert_srgb_sse2.cxx index e83a00394d..8121eccc8d 100644 --- a/panda/src/pnmimage/convert_srgb_sse2.cxx +++ b/panda/src/pnmimage/convert_srgb_sse2.cxx @@ -95,7 +95,7 @@ encode_sRGB_uchar_sse2(float val) { // Running only a single component through this function is still // way faster than the equivalent non-SSE2 version. return (unsigned char) - _mm_extract_epi32(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0); + _mm_extract_epi16(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0); } void @@ -107,9 +107,9 @@ encode_sRGB_uchar_sse2(const LColorf &color, xel &into) { #endif __m128i vals = _encode_sRGB_sse2_mul255(vec); - into.r = _mm_extract_epi32(vals, 0); - into.g = _mm_extract_epi32(vals, 1); - into.b = _mm_extract_epi32(vals, 2); + into.r = _mm_extract_epi16(vals, 0); + into.g = _mm_extract_epi16(vals, 2); + into.b = _mm_extract_epi16(vals, 4); } void @@ -121,10 +121,10 @@ encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) { #endif __m128i vals = _encode_sRGB_sse2_mul255(vec); - into.r = _mm_extract_epi32(vals, 0); - into.g = _mm_extract_epi32(vals, 1); - into.b = _mm_extract_epi32(vals, 2); - into_alpha = _mm_extract_epi32(vals, 3); + into.r = _mm_extract_epi16(vals, 0); + into.g = _mm_extract_epi16(vals, 2); + into.b = _mm_extract_epi16(vals, 4); + into_alpha = _mm_extract_epi16(vals, 6); } #else