diff options
-rw-r--r-- | vec_avx_fp16_16.h | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/vec_avx_fp16_16.h b/vec_avx_fp16_16.h index ddc7f18..8dadf64 100644 --- a/vec_avx_fp16_16.h +++ b/vec_avx_fp16_16.h @@ -315,12 +315,12 @@ template <> struct intvec<fp16, 16> : floatprops<fp16> { // TODO: Use permute instead of shift/mask? __m256i mlo = _mm256_set1_epi32(U(0x0000ffff)); __m256i vlo = v; - __m256i vhi = _mm256_andnot_si256(mlo, v; - __m256i clo = _mm256_and_si256(mlo, n); - __m256i chi = _mm256_and_si256(mlo, _mm256_srli_epi32(n, 16)); - __m256i rlo = _mm256_and_si256(mlo, _mm256_sllv_epi32(vlo, clo)); - __m256i rhi = _mm256_sllv_epi32(vhi, chi); - return _mm256_or_si256(rhi, rlo); + __m256i vhi = _mm256_andnot_si256(mlo, v); + __m256i clo = _mm256_and_si256(mlo, n); + __m256i chi = _mm256_and_si256(mlo, _mm256_srli_epi32(n, 16)); + __m256i rlo = _mm256_and_si256(mlo, _mm256_sllv_epi32(vlo, clo)); + __m256i rhi = _mm256_sllv_epi32(vhi, chi); + return _mm256_or_si256(rhi, rlo); #else intvec r; for (int i = 0; i < size; ++i) { |