diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-02-19 12:48:06 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-02-19 12:48:06 -0500 |
commit | fc1ca4fc870172fd326baec7b7e845638b8463ff (patch) | |
tree | bf50f804cd8c04b954afa346a72ebf6d88ce3f6e | |
parent | 342c3ee75aad08468a4492cf60415b27762e504a (diff) | |
download | vecmathlib-fc1ca4fc870172fd326baec7b7e845638b8463ff.zip vecmathlib-fc1ca4fc870172fd326baec7b7e845638b8463ff.tar.gz |
Improve lsr() implementation
-rw-r--r-- | vec_float_avx.h | 14 |
1 files changed, 2 insertions, 12 deletions
diff --git a/vec_float_avx.h b/vec_float_avx.h index 728249a..536e573 100644 --- a/vec_float_avx.h +++ b/vec_float_avx.h @@ -259,18 +259,8 @@ namespace vecmathlib { { __m128i vlo = _mm256_castsi256_si128(v); __m128i vhi = _mm256_extractf128_si256(v, 1); - // There is no _mm_srai_epi32. To emulate it, add 0x80000000 - // before shifting, and subtract the shifted 0x80000000 after - // shifting - // Convert signed to unsiged - vlo = _mm_add_epi32(vlo, _mm_set1_epi32(U(1) << (bits-1))); - vhi = _mm_add_epi32(vhi, _mm_set1_epi32(U(1) << (bits-1))); - // Shift - vlo = _mm_srli_epi32(vlo, n); - vhi = _mm_srli_epi32(vhi, n); - // Undo conversion - vlo = _mm_sub_epi32(vlo, _mm_set1_epi32(U(1) << (bits-n))); - vhi = _mm_sub_epi32(vhi, _mm_set1_epi32(U(1) << (bits-n))); + vlo = _mm_srai_epi32(vlo, n); + vhi = _mm_srai_epi32(vhi, n); return _mm256_insertf128_si256(_mm256_castsi128_si256(vlo), vhi, 1); } intvec operator<<(int_t n) const |