diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/ammintrin.h | 68 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/avx2intrin.h | 240 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/bmiintrin.h | 6 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/emmintrin.h | 5 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/float.h | 2 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/fmaintrin.h | 229 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/immintrin.h | 26 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/stddef.h | 16 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/wmmintrin.h | 18 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/x86intrin.h | 10 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/xopintrin.h | 411 |
11 files changed, 1019 insertions, 12 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h new file mode 100644 index 0000000..d87b9cd --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h @@ -0,0 +1,68 @@ +/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AMMINTRIN_H +#define __AMMINTRIN_H + +#ifndef __SSE4A__ +#error "SSE4A instruction set not enabled" +#else + +#include <pmmintrin.h> + +#define _mm_extracti_si64(x, len, idx) \ + ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_extract_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); +} + +#define _mm_inserti_si64(x, y, len, idx) \ + ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ + (__v2di)(__m128i)(y), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_insert_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_sd(double *__p, __m128d __a) +{ + __builtin_ia32_movntsd(__p, (__v2df)__a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_ss(float *__p, __m128 __a) +{ + __builtin_ia32_movntss(__p, (__v4sf)__a); +} + +#endif /* __SSE4A__ */ + +#endif /* __AMMINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h index 884c46d..2c53aed 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h @@ -959,3 +959,243 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); } + +#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ + (__v4si)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4si)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ + (__v2di)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m256i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4di)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v4si)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m256 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m256 __mask = (mask); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ + (__v8si)__i, (__v8sf)__mask, (s)); }) + +#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v2di)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ + (__v4di)__i, (__v4sf)__mask, (s)); }) + +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ + (__v4si)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ + (__v8si)__i, (__v8si)__mask, (s)); }) + +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ + (__v2di)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ + (__v4di)__i, (__v4si)__mask, (s)); }) + +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ + (__v4si)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4si)__i, (__v4di)__mask, (s)); }) + +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ + (__v2di)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4di)__i, (__v4di)__mask, (s)); }) + +#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v4si)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4si)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v2di)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m256i __i = (i); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4di)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4si)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ + (const __v8sf *)__m, (__v8si)__i, \ + (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v2di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4si)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ + (const __v8si *)__m, (__v8si)__i, \ + (__v8si)_mm256_set1_epi32(-1), (s)); }) + +#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v2di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v4si)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4si)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v2di)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4di)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) diff --git a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h index 2f7db73..8cb00f5 100644 --- a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h @@ -33,7 +33,7 @@ #define __BMIINTRIN_H static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) -__tzcnt16(unsigned short __X) +__tzcnt_u16(unsigned short __X) { return __builtin_ctzs(__X); } @@ -69,7 +69,7 @@ __blsr_u32(unsigned int __X) } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__tzcnt32(unsigned int __X) +__tzcnt_u32(unsigned int __X) { return __builtin_ctz(__X); } @@ -106,7 +106,7 @@ __blsr_u64(unsigned long long __X) } static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__tzcnt64(unsigned long long __X) +__tzcnt_u64(unsigned long long __X) { return __builtin_ctzll(__X); } diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h index e10b77d..91395ed 100644 --- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h @@ -1186,7 +1186,10 @@ _mm_maskmoveu_si128(__m128i d, __m128i n, char *p) static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_storel_epi64(__m128i *p, __m128i a) { - __builtin_ia32_storelv4si((__v2si *)p, a); + struct __mm_storel_epi64_struct { + long long u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storel_epi64_struct*)p)->u = a[0]; } static __inline__ void __attribute__((__always_inline__, __nodebug__)) diff --git a/contrib/llvm/tools/clang/lib/Headers/float.h b/contrib/llvm/tools/clang/lib/Headers/float.h index 65b517d..2cb13d3 100644 --- a/contrib/llvm/tools/clang/lib/Headers/float.h +++ b/contrib/llvm/tools/clang/lib/Headers/float.h @@ -28,7 +28,7 @@ * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx */ -#if defined(__MINGW32__) && \ +#if (defined(__MINGW32__) || defined(_MSC_VER)) && \ defined(__has_include_next) && __has_include_next(<float.h>) # include_next <float.h> diff --git a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h new file mode 100644 index 0000000..6bfd5a8 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h @@ -0,0 +1,229 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __FMAINTRIN_H +#define __FMAINTRIN_H + +#ifndef __FMA__ +# error "FMA instruction set is not enabled" +#else + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#endif /* __FMA__ */ + +#endif /* __FMAINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h index 1605525..15b65f3 100644 --- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h @@ -72,4 +72,30 @@ #include <lzcntintrin.h> #endif +#ifdef __FMA__ +#include <fmaintrin.h> +#endif + +#ifdef __RDRND__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand16_step(unsigned short *__p) +{ + return __builtin_ia32_rdrand16_step(__p); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand32_step(unsigned int *__p) +{ + return __builtin_ia32_rdrand32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdrand64_step(__p); +} +#endif +#endif /* __RDRND__ */ + #endif /* __IMMINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/stddef.h b/contrib/llvm/tools/clang/lib/Headers/stddef.h index 9e87ee89..eb919b5 100644 --- a/contrib/llvm/tools/clang/lib/Headers/stddef.h +++ b/contrib/llvm/tools/clang/lib/Headers/stddef.h @@ -43,10 +43,20 @@ typedef __WCHAR_TYPE__ wchar_t; #undef NULL #ifdef __cplusplus -#undef __null // VC++ hack. -#define NULL __null +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# define NULL 0 +# endif #else -#define NULL ((void*)0) +# define NULL ((void*)0) +#endif + +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif #endif #define offsetof(t, d) __builtin_offsetof(t, d) diff --git a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h index 8f58850..dca896f 100644 --- a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h @@ -24,11 +24,13 @@ #ifndef _WMMINTRIN_H #define _WMMINTRIN_H -#if !defined (__AES__) -# error "AES instructions not enabled" +#include <emmintrin.h> + +#if !defined (__AES__) && !defined (__PCLMUL__) +# error "AES/PCLMUL instructions not enabled" #else -#include <xmmintrin.h> +#ifdef __AES__ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_aesenc_si128(__m128i __V, __m128i __R) @@ -64,4 +66,14 @@ _mm_aesimc_si128(__m128i __V) __builtin_ia32_aeskeygenassist128((C), (R)) #endif /* __AES__ */ + +#ifdef __PCLMUL__ + +#define _mm_clmulepi64_si128(__X, __Y, __I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ + (__v2di)(__m128i)(__Y), (char)(__I))) + +#endif /* __PCLMUL__ */ + +#endif /* __AES__ || __PCLMUL__ */ #endif /* _WMMINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h index f5e4d88..556cd01 100644 --- a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h @@ -46,10 +46,18 @@ #include <popcntintrin.h> #endif +#ifdef __SSE4A__ +#include <ammintrin.h> +#endif + #ifdef __FMA4__ #include <fma4intrin.h> #endif -// FIXME: SSE4A, XOP, LWP, ABM +#ifdef __XOP__ +#include <xopintrin.h> +#endif + +// FIXME: LWP #endif /* __X86INTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h new file mode 100644 index 0000000..d107be4 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h @@ -0,0 +1,411 @@ +/*===---- xopintrin.h - FMA4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef __XOPINTRIN_H +#define __XOPINTRIN_H + +#ifndef __XOP__ +# error "XOP instruction set is not enabled" +#else + +#include <fma4intrin.h> + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); +} + +#define _mm_roti_epi8(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) + +#define _mm_roti_epi16(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) + +#define _mm_roti_epi32(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) + +#define _mm_roti_epi64(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); +} + +#define _mm_com_epu8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epu16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epu32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epu64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_com_epi8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epi16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epi32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epi64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + __m128i __C = (C); \ + (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ + (__v2di)__C, (I)); }) + +#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m256d __X = (X); \ + __m256d __Y = (Y); \ + __m256i __C = (C); \ + (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ + (__v4di)__C, (I)); }) + +#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + __m128i __C = (C); \ + (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ + (__v4si)__C, (I)); }) + +#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m256 __X = (X); \ + __m256 __Y = (Y); \ + __m256i __C = (C); \ + (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ + (__v8si)__C, (I)); }) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ss(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_sd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ps(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_pd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_ps(__m256 __A) +{ + return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_pd(__m256d __A) +{ + return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); +} + +#endif /* __XOP__ */ + +#endif /* __XOPINTRIN_H */ |