summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-02-19 12:48:06 -0500
committerErik Schnetter <schnetter@gmail.com>2013-02-19 12:48:06 -0500
commitfc1ca4fc870172fd326baec7b7e845638b8463ff (patch)
treebf50f804cd8c04b954afa346a72ebf6d88ce3f6e
parent342c3ee75aad08468a4492cf60415b27762e504a (diff)
downloadvecmathlib-fc1ca4fc870172fd326baec7b7e845638b8463ff.zip
vecmathlib-fc1ca4fc870172fd326baec7b7e845638b8463ff.tar.gz
Improve lsr() implementation
-rw-r--r--vec_float_avx.h14
1 files changed, 2 insertions, 12 deletions
diff --git a/vec_float_avx.h b/vec_float_avx.h
index 728249a..536e573 100644
--- a/vec_float_avx.h
+++ b/vec_float_avx.h
@@ -259,18 +259,8 @@ namespace vecmathlib {
{
__m128i vlo = _mm256_castsi256_si128(v);
__m128i vhi = _mm256_extractf128_si256(v, 1);
- // There is no _mm_srai_epi32. To emulate it, add 0x80000000
- // before shifting, and subtract the shifted 0x80000000 after
- // shifting
- // Convert signed to unsiged
- vlo = _mm_add_epi32(vlo, _mm_set1_epi32(U(1) << (bits-1)));
- vhi = _mm_add_epi32(vhi, _mm_set1_epi32(U(1) << (bits-1)));
- // Shift
- vlo = _mm_srli_epi32(vlo, n);
- vhi = _mm_srli_epi32(vhi, n);
- // Undo conversion
- vlo = _mm_sub_epi32(vlo, _mm_set1_epi32(U(1) << (bits-n)));
- vhi = _mm_sub_epi32(vhi, _mm_set1_epi32(U(1) << (bits-n)));
+ vlo = _mm_srai_epi32(vlo, n);
+ vhi = _mm_srai_epi32(vhi, n);
return _mm256_insertf128_si256(_mm256_castsi128_si256(vlo), vhi, 1);
}
intvec operator<<(int_t n) const
OpenPOWER on IntegriCloud