Replace SSE4 instruction with SSE2 instruction

This commit is contained in:
rdb 2015-03-02 16:18:55 +01:00
parent 97c24c9ef0
commit d11cafc374

View File

@ -95,7 +95,7 @@ encode_sRGB_uchar_sse2(float val) {
// Running only a single component through this function is still
// way faster than the equivalent non-SSE2 version.
return (unsigned char)
_mm_extract_epi32(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0);
_mm_extract_epi16(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0);
}
void
@ -107,9 +107,9 @@ encode_sRGB_uchar_sse2(const LColorf &color, xel &into) {
#endif
__m128i vals = _encode_sRGB_sse2_mul255(vec);
into.r = _mm_extract_epi32(vals, 0);
into.g = _mm_extract_epi32(vals, 1);
into.b = _mm_extract_epi32(vals, 2);
into.r = _mm_extract_epi16(vals, 0);
into.g = _mm_extract_epi16(vals, 2);
into.b = _mm_extract_epi16(vals, 4);
}
void
@ -121,10 +121,10 @@ encode_sRGB_uchar_sse2(const LColorf &color, xel &into, xelval &into_alpha) {
#endif
__m128i vals = _encode_sRGB_sse2_mul255(vec);
into.r = _mm_extract_epi32(vals, 0);
into.g = _mm_extract_epi32(vals, 1);
into.b = _mm_extract_epi32(vals, 2);
into_alpha = _mm_extract_epi32(vals, 3);
into.r = _mm_extract_epi16(vals, 0);
into.g = _mm_extract_epi16(vals, 2);
into.b = _mm_extract_epi16(vals, 4);
into_alpha = _mm_extract_epi16(vals, 6);
}
#else