diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h | 701 |
1 files changed, 659 insertions, 42 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h index 4f7cba0..8dcdc71 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h @@ -1,4 +1,4 @@ -/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------=== +/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -47,7 +47,7 @@ typedef unsigned short __mmask16; #define _MM_FROUND_CUR_DIRECTION 0x04 /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) /* Create vectors with repeated elements */ @@ -57,6 +57,30 @@ _mm512_setzero_si512(void) return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_undefined_pd() +{ + return (__m512d)__builtin_ia32_undef512(); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_undefined() +{ + return (__m512)__builtin_ia32_undef512(); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_undefined_ps() +{ + return (__m512)__builtin_ia32_undef512(); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_undefined_epi32() +{ + return (__m512i)__builtin_ia32_undef512(); +} + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi32(__mmask16 __M, int __A) { @@ -543,6 +567,66 @@ _mm512_max_ps(__m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi32(__m512i __A, __m512i __B) @@ -606,6 +690,66 @@ _mm512_min_ps(__m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi32(__m512i __A, __m512i __B) @@ -728,18 +872,18 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_sqrt_pd(__m512d a) +_mm512_sqrt_pd(__m512d __a) { - return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a, + return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, (__v8df) _mm512_setzero_pd (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_sqrt_ps(__m512 a) +_mm512_sqrt_ps(__m512 __a) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a, + return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); @@ -765,7 +909,7 @@ _mm512_rsqrt14_ps(__m512 __A) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ss(__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -775,7 +919,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_sd(__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), @@ -802,7 +946,7 @@ _mm512_rcp14_ps(__m512 __A) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ss(__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, + return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -812,7 +956,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_sd(__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, + return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), @@ -873,6 +1017,489 @@ _mm512_abs_epi32(__m512i __A) (__mmask16) -1); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \ + (__v8df) __W, (__mmask8) __U, __R); }) + +#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); }) + +#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); }) + +#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) __W, (__mmask16)__U, __R); }) + +#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); }) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\ + (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) __W, (__mmask8) __U, __R); }) + +#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) + +#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) + +#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) __W, (__mmask16) __U, __R); }); + +#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\ + (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) __W, (__mmask8) __U, __R); }) + +#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) + +#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) + +#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) __W, (__mmask16) __U, __R); }); + +#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) + +#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \ + (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ + (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) __W, (__mmask8) __U,__R); }) + +#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \ + (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ + (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\ + (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) + +#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) __W, (__mmask8) __U, __R); }) + +#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \ + (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ + (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) + +#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) + +#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) __W, (__mmask16) __U, __R); }); + +#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \ + (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ + (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); + #define _mm512_roundscale_ps(A, B) __extension__ ({ \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ -1, _MM_FROUND_CUR_DIRECTION); }) @@ -1706,17 +2333,15 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ - __m512d __A = (A); \ (__m256d) \ - __builtin_ia32_extractf64x4_mask((__v8df)__A, \ + __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \ (I), \ (__v4df)_mm256_setzero_si256(), \ (__mmask8) -1); }) #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ - __m512 __A = (A); \ (__m128) \ - __builtin_ia32_extractf32x4_mask((__v16sf)__A, \ + __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \ (I), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8) -1); }) @@ -1850,18 +2475,18 @@ _mm512_cvtph_ps(__m256i __A) } static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_cvttps_epi32(__m512 a) +_mm512_cvttps_epi32(__m512 __a) { return (__m512i) - __builtin_ia32_cvttps2dq512_mask((__v16sf) a, + __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, (__v16si) _mm512_setzero_si512 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline __m256i __DEFAULT_FN_ATTRS -_mm512_cvttpd_epi32(__m512d a) +_mm512_cvttpd_epi32(__m512d __a) { - return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a, + return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, (__v8si)_mm256_setzero_si256(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); @@ -2405,51 +3030,43 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { } #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (p), \ (__mmask16)-1); }) #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (p), \ (__mmask16)-1); }) #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (p), \ (__mmask8)-1); }) #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (p), \ (__mmask8)-1); }) #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (p), \ (__mmask16)(m)); }) #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (p), \ (__mmask16)(m)); }) #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (p), \ (__mmask8)(m)); }) #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ - __m512i __a = (a); \ - __m512i __b = (b); \ - (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (p), \ (__mmask8)(m)); }) #undef __DEFAULT_FN_ATTRS |