diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers')
23 files changed, 2528 insertions, 187 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/Intrin.h index 13e105e..84bc430 100644 --- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/Intrin.h @@ -160,9 +160,6 @@ void __writefsword(unsigned long, unsigned short); void __writemsr(unsigned long, unsigned __int64); static __inline__ void *_AddressOfReturnAddress(void); -unsigned int _andn_u32(unsigned int, unsigned int); -unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); -unsigned int _bextri_u32(unsigned int, unsigned int); static __inline__ unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); static __inline__ @@ -175,20 +172,9 @@ static __inline__ unsigned char _bittestandreset(long *, long); static __inline__ unsigned char _bittestandset(long *, long); -unsigned int _blcfill_u32(unsigned int); -unsigned int _blci_u32(unsigned int); -unsigned int _blcic_u32(unsigned int); -unsigned int _blcmsk_u32(unsigned int); -unsigned int _blcs_u32(unsigned int); -unsigned int _blsfill_u32(unsigned int); -unsigned int _blsi_u32(unsigned int); -unsigned int _blsic_u32(unsigned int); -unsigned int _blsmsk_u32(unsigned int); -unsigned int _blsr_u32(unsigned int); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); unsigned long __cdecl _byteswap_ulong(unsigned long); unsigned short __cdecl _byteswap_ushort(unsigned short); -unsigned _bzhi_u32(unsigned int, unsigned int); void __cdecl _disable(void); void __cdecl _enable(void); void __cdecl _fxrstor(void const *); @@ -266,7 +252,6 @@ unsigned long __cdecl _lrotl(unsigned long, int); static __inline__ unsigned long __cdecl _lrotr(unsigned long, int); static __inline__ -unsigned int _lzcnt_u32(unsigned int); static __inline__ void _ReadBarrier(void); static __inline__ @@ -274,8 +259,6 @@ void _ReadWriteBarrier(void); static __inline__ void *_ReturnAddress(void); unsigned int _rorx_u32(unsigned int, const unsigned int); -int __cdecl _rdrand16_step(unsigned short *); -int __cdecl _rdrand32_step(unsigned int *); static __inline__ unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); static __inline__ @@ -301,12 +284,8 @@ unsigned int _shrx_u32(unsigned int, unsigned int); void _Store_HLERelease(long volatile *, long); void _Store64_HLERelease(__int64 volatile *, __int64); void _StorePointer_HLERelease(void *volatile *, void *); -unsigned int _t1mskc_u32(unsigned int); -unsigned int _tzcnt_u32(unsigned int); -unsigned int _tzmsk_u32(unsigned int); static __inline__ void _WriteBarrier(void); -void _xabort(const unsigned int imm); unsigned __int32 xbegin(void); void _xend(void); static __inline__ @@ -315,7 +294,6 @@ void __cdecl _xrstor(void const *, unsigned __int64); void __cdecl _xsave(void *, unsigned __int64); void __cdecl _xsaveopt(void *, unsigned __int64); void __cdecl _xsetbv(unsigned int, unsigned __int64); -unsigned char _xtest(void); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #ifdef __x86_64__ @@ -352,7 +330,6 @@ unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned char _Shift); static __inline__ void __stosq(unsigned __int64 *, unsigned __int64, size_t); -unsigned __int64 __umulh(unsigned __int64, unsigned __int64); unsigned char __vmx_on(unsigned __int64 *); unsigned char __vmx_vmclear(unsigned __int64 *); unsigned char __vmx_vmlaunch(void); @@ -364,9 +341,6 @@ void __writegsbyte(unsigned long, unsigned char); void __writegsdword(unsigned long, unsigned long); void __writegsqword(unsigned long, unsigned __int64); void __writegsword(unsigned long, unsigned short); -unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); -unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); -unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); static __inline__ unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); static __inline__ @@ -379,18 +353,7 @@ static __inline__ unsigned char _bittestandreset64(__int64 *, __int64); static __inline__ unsigned char _bittestandset64(__int64 *, __int64); -unsigned __int64 _blcfill_u64(unsigned __int64); -unsigned __int64 _blci_u64(unsigned __int64); -unsigned __int64 _blcic_u64(unsigned __int64); -unsigned __int64 _blcmsk_u64(unsigned __int64); -unsigned __int64 _blcs_u64(unsigned __int64); -unsigned __int64 _blsfill_u64(unsigned __int64); -unsigned __int64 _blsi_u64(unsigned __int64); -unsigned __int64 _blsic_u64(unsigned __int64); -unsigned __int64 _blsmsk_u64(unsigned __int64); -unsigned __int64 _blsr_u64(unsigned __int64); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); -unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); void __cdecl _fxrstor64(void const *); void __cdecl _fxsave64(void *); long _InterlockedAnd_np(long volatile *_Value, long _Mask); @@ -444,29 +407,33 @@ __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedXor8_np(char volatile *_Value, char _Mask); static __inline__ -unsigned __int64 _lzcnt_u64(unsigned __int64); __int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, __int64 *_HighProduct); -unsigned int __cdecl _readfsbase_u32(void); -unsigned __int64 __cdecl _readfsbase_u64(void); -unsigned int __cdecl _readgsbase_u32(void); -unsigned __int64 __cdecl _readgsbase_u64(void); unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); #if __STDC_HOSTED__ int __cdecl _setjmpex(jmp_buf); #endif unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); -unsigned __int64 shrx_u64(unsigned __int64, unsigned int); -unsigned __int64 _tzcnt_u64(unsigned __int64); -unsigned __int64 _tzmsk_u64(unsigned __int64); -unsigned __int64 _umul128(unsigned __int64 _Multiplier, - unsigned __int64 _Multiplicand, - unsigned __int64 *_HighProduct); -void __cdecl _writefsbase_u32(unsigned int); -void _cdecl _writefsbase_u64(unsigned __int64); -void __cdecl _writegsbase_u32(unsigned int); -void __cdecl _writegsbase_u64(unsigned __int64); +unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); +/* + * Multiply two 64-bit integers and obtain a 64-bit result. + * The low-half is returned directly and the high half is in an out parameter. + */ +static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, + unsigned __int64 *_HighProduct) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + *_HighProduct = _FullProduct >> 64; + return _FullProduct; +} +static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +__umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + return _FullProduct >> 64; +} void __cdecl _xrstor64(void const *, unsigned __int64); void __cdecl _xsave64(void *, unsigned __int64); void __cdecl _xsaveopt64(void *, unsigned __int64); @@ -545,12 +512,6 @@ _BitScanReverse(unsigned long *_Index, unsigned long _Mask) { *_Index = 31 - __builtin_clzl(_Mask); return 1; } -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -_lzcnt_u32(unsigned int a) { - if (!a) - return 32; - return __builtin_clzl(a); -} static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) __popcnt16(unsigned short value) { return __builtin_popcount((int)value); @@ -608,13 +569,6 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { *_Index = 63 - __builtin_clzll(_Mask); return 1; } -static -__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) -_lzcnt_u64(unsigned __int64 a) { - if (!a) - return 64; - return __builtin_clzll(a); -} static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) __popcnt64(unsigned __int64 value) { @@ -861,10 +815,6 @@ static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) __readfsbyte(unsigned long __offset) { return *__ptr_to_addr_space(257, unsigned char, __offset); } -static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) -__readfsdword(unsigned long __offset) { - return *__ptr_to_addr_space(257, unsigned long, __offset); -} static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) __readfsqword(unsigned long __offset) { return *__ptr_to_addr_space(257, unsigned __int64, __offset); diff --git a/contrib/llvm/tools/clang/lib/Headers/__stddef_max_align_t.h b/contrib/llvm/tools/clang/lib/Headers/__stddef_max_align_t.h new file mode 100644 index 0000000..a06f412 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/__stddef_max_align_t.h @@ -0,0 +1,40 @@ +/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---=== + * + * Copyright (c) 2014 Chandler Carruth + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_MAX_ALIGN_T_DEFINED +#define __CLANG_MAX_ALIGN_T_DEFINED + +#ifndef _MSC_VER +typedef struct { + long long __clang_max_align_nonce1 + __attribute__((__aligned__(__alignof__(long long)))); + long double __clang_max_align_nonce2 + __attribute__((__aligned__(__alignof__(long double)))); +} max_align_t; +#else +typedef double max_align_t; +#endif + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/adxintrin.h b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h new file mode 100644 index 0000000..9db8bcb --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h @@ -0,0 +1,83 @@ +/*===---- adxintrin.h - ADX intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <adxintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __ADXINTRIN_H +#define __ADXINTRIN_H + +/* Intrinsics that are available only if __ADX__ defined */ +#ifdef __ADX__ +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_addcarryx_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); +} +#endif +#endif + +/* Intrinsics that are also available if __ADX__ undefined */ +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_addcarry_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p); +} +#endif + +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, + unsigned int *__p) +{ + return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +static __inline unsigned char __attribute__((__always_inline__, __nodebug__)) +_subborrow_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) +{ + return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); +} +#endif + +#endif /* __ADXINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h index f9fc64a..0ac0841 100644 --- a/contrib/llvm/tools/clang/lib/Headers/altivec.h +++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h @@ -1623,6 +1623,21 @@ vec_vctuxs(vector float __a, int __b) return __builtin_altivec_vctuxs(__a, __b); } +/* vec_div */ +#ifdef __VSX__ +static vector float __ATTRS_o_ai +vec_div(vector float __a, vector float __b) +{ + return __builtin_vsx_xvdivsp(__a, __b); +} + +static vector double __ATTRS_o_ai +vec_div(vector double __a, vector double __b) +{ + return __builtin_vsx_xvdivdp(__a, __b); +} +#endif + /* vec_dss */ static void __attribute__((__always_inline__)) @@ -2253,91 +2268,273 @@ vec_vlogefp(vector float __a) /* vec_lvsl */ +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const signed char *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const signed char *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const unsigned char *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const short *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const short *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const unsigned short *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned short *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const int *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const int *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const unsigned int *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned int *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsl(int __a, const float *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsl(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const float *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } +#endif /* vec_lvsr */ +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const signed char *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const signed char *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const unsigned char *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const short *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const short *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const unsigned short *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned short *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const int *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const int *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const unsigned int *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned int *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif +#ifdef __LITTLE_ENDIAN__ +static vector unsigned char __ATTRS_o_ai +__attribute__((deprecated("use assignment for unaligned little endian \ +loads/stores"))) +vec_lvsr(int __a, const float *__b) +{ + vector unsigned char mask = + (vector unsigned char)__builtin_altivec_lvsr(__a, __b); + vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + return vec_perm(mask, mask, reverse); +} +#else static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } +#endif /* vec_madd */ @@ -2485,8 +2682,20 @@ vec_max(vector unsigned int __a, vector bool int __b) static vector float __ATTRS_o_ai vec_max(vector float __a, vector float __b) { +#ifdef __VSX__ + return __builtin_vsx_xvmaxsp(__a, __b); +#else return __builtin_altivec_vmaxfp(__a, __b); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_max(vector double __a, vector double __b) +{ + return __builtin_vsx_xvmaxdp(__a, __b); } +#endif /* vec_vmaxsb */ @@ -2613,7 +2822,11 @@ vec_vmaxuw(vector unsigned int __a, vector bool int __b) static vector float __attribute__((__always_inline__)) vec_vmaxfp(vector float __a, vector float __b) { +#ifdef __VSX__ + return __builtin_vsx_xvmaxsp(__a, __b); +#else return __builtin_altivec_vmaxfp(__a, __b); +#endif } /* vec_mergeh */ @@ -3117,9 +3330,21 @@ vec_min(vector unsigned int __a, vector bool int __b) static vector float __ATTRS_o_ai vec_min(vector float __a, vector float __b) { +#ifdef __VSX__ + return __builtin_vsx_xvminsp(__a, __b); +#else return __builtin_altivec_vminfp(__a, __b); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_min(vector double __a, vector double __b) +{ + return __builtin_vsx_xvmindp(__a, __b); +} +#endif + /* vec_vminsb */ static vector signed char __ATTRS_o_ai @@ -3245,7 +3470,11 @@ vec_vminuw(vector unsigned int __a, vector bool int __b) static vector float __attribute__((__always_inline__)) vec_vminfp(vector float __a, vector float __b) { +#ifdef __VSX__ + return __builtin_vsx_xvminsp(__a, __b); +#else return __builtin_altivec_vminfp(__a, __b); +#endif } /* vec_mladd */ @@ -4506,7 +4735,7 @@ vec_vpkswus(vector unsigned int __a, vector unsigned int __b) // in that the vec_xor can be recognized as a vec_nor (and for P8 and // later, possibly a vec_nand). -vector signed char __ATTRS_o_ai +static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4521,7 +4750,7 @@ vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __ #endif } -vector unsigned char __ATTRS_o_ai +static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) @@ -4538,7 +4767,7 @@ vec_perm(vector unsigned char __a, #endif } -vector bool char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4553,7 +4782,7 @@ vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) #endif } -vector short __ATTRS_o_ai +static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4568,7 +4797,7 @@ vec_perm(vector short __a, vector short __b, vector unsigned char __c) #endif } -vector unsigned short __ATTRS_o_ai +static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a, vector unsigned short __b, vector unsigned char __c) @@ -4585,7 +4814,7 @@ vec_perm(vector unsigned short __a, #endif } -vector bool short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4600,7 +4829,7 @@ vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c) #endif } -vector pixel __ATTRS_o_ai +static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4615,7 +4844,7 @@ vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) #endif } -vector int __ATTRS_o_ai +static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4628,7 +4857,7 @@ vec_perm(vector int __a, vector int __b, vector unsigned char __c) #endif } -vector unsigned int __ATTRS_o_ai +static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4643,7 +4872,7 @@ vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char #endif } -vector bool int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4658,7 +4887,7 @@ vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) #endif } -vector float __ATTRS_o_ai +static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4673,6 +4902,52 @@ vec_perm(vector float __a, vector float __b, vector unsigned char __c) #endif } +#ifdef __VSX__ +static vector long long __ATTRS_o_ai +vec_perm(vector long long __a, vector long long __b, vector unsigned char __c) +{ +#ifdef __LITTLE_ENDIAN__ + vector unsigned char __d = {255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255}; + __d = vec_xor(__c, __d); + return (vector long long)__builtin_altivec_vperm_4si(__b, __a, __d); +#else + return (vector long long)__builtin_altivec_vperm_4si(__a, __b, __c); +#endif +} + +static vector unsigned long long __ATTRS_o_ai +vec_perm(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned char __c) +{ +#ifdef __LITTLE_ENDIAN__ + vector unsigned char __d = {255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255}; + __d = vec_xor(__c, __d); + return (vector unsigned long long) + __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); +#else + return (vector unsigned long long) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +#endif +} + +static vector double __ATTRS_o_ai +vec_perm(vector double __a, vector double __b, vector unsigned char __c) +{ +#ifdef __LITTLE_ENDIAN__ + vector unsigned char __d = {255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255}; + __d = vec_xor(__c, __d); + return (vector double) + __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); +#else + return (vector double) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +#endif +} +#endif + /* vec_vperm */ static vector signed char __ATTRS_o_ai @@ -4745,6 +5020,27 @@ vec_vperm(vector float __a, vector float __b, vector unsigned char __c) return vec_perm(__a, __b, __c); } +#ifdef __VSX__ +static vector long long __ATTRS_o_ai +vec_vperm(vector long long __a, vector long long __b, vector unsigned char __c) +{ + return vec_perm(__a, __b, __c); +} + +static vector unsigned long long __ATTRS_o_ai +vec_vperm(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned char __c) +{ + return vec_perm(__a, __b, __c); +} + +static vector double __ATTRS_o_ai +vec_vperm(vector double __a, vector double __b, vector unsigned char __c) +{ + return vec_perm(__a, __b, __c); +} +#endif + /* vec_re */ static vector float __attribute__((__always_inline__)) @@ -8368,11 +8664,11 @@ vec_sum2s(vector int __a, vector int __b) #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int) vec_perm(__b, __b, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int) vec_perm(__c, __c, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif @@ -8386,11 +8682,11 @@ vec_vsum2sws(vector int __a, vector int __b) #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int) vec_perm(__b, __b, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int) vec_perm(__c, __c, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif @@ -8661,6 +8957,91 @@ vec_vupklsh(vector pixel __a) #endif } +/* vec_vsx_ld */ + +#ifdef __VSX__ + +static vector signed int __ATTRS_o_ai +vec_vsx_ld(int __a, const vector signed int *__b) +{ + return (vector signed int)__builtin_vsx_lxvw4x(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsx_ld(int __a, const vector unsigned int *__b) +{ + return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_vsx_ld(int __a, const vector float *__b) +{ + return (vector float)__builtin_vsx_lxvw4x(__a, __b); +} + +static vector signed long long __ATTRS_o_ai +vec_vsx_ld(int __a, const vector signed long long *__b) +{ + return (vector signed long long)__builtin_vsx_lxvd2x(__a, __b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_vsx_ld(int __a, const vector unsigned long long *__b) +{ + return (vector unsigned long long)__builtin_vsx_lxvd2x(__a, __b); +} + +static vector double __ATTRS_o_ai +vec_vsx_ld(int __a, const vector double *__b) +{ + return (vector double)__builtin_vsx_lxvd2x(__a, __b); +} + +#endif + +/* vec_vsx_st */ + +#ifdef __VSX__ + +static void __ATTRS_o_ai +vec_vsx_st(vector signed int __a, int __b, vector signed int *__c) +{ + __builtin_vsx_stxvw4x((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_vsx_st(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + __builtin_vsx_stxvw4x((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_vsx_st(vector float __a, int __b, vector float *__c) +{ + __builtin_vsx_stxvw4x((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_vsx_st(vector signed long long __a, int __b, vector signed long long *__c) +{ + __builtin_vsx_stxvd2x((vector double)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_vsx_st(vector unsigned long long __a, int __b, + vector unsigned long long *__c) +{ + __builtin_vsx_stxvd2x((vector double)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_vsx_st(vector double __a, int __b, vector double *__c) +{ + __builtin_vsx_stxvd2x((vector double)__a, __b, __c); +} + +#endif + /* vec_xor */ #define __builtin_altivec_vxor vec_xor diff --git a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h index a0fd689..814df2c 100644 --- a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h +++ b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h @@ -66,6 +66,41 @@ static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) { } #endif +#if __ARM_32BIT_STATE +#define __dbg(t) __builtin_arm_dbg(t) +#endif + +/* 8.5 Swap */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __swp(uint32_t x, volatile uint32_t *p) { + uint32_t v; + do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p)); + return v; +} + +/* 8.6 Memory prefetch intrinsics */ +/* 8.6.1 Data prefetch */ +#define __pld(addr) __pldx(0, 0, 0, addr) + +#if __ARM_32BIT_STATE +#define __pldx(access_kind, cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, access_kind, 1) +#else +#define __pldx(access_kind, cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) +#endif + +/* 8.6.2 Instruction prefetch */ +#define __pli(addr) __plix(0, 0, addr) + +#if __ARM_32BIT_STATE +#define __plix(cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, 0, 0) +#else +#define __plix(cache_level, retention_policy, addr) \ + __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) +#endif + /* 8.7 NOP */ static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) { __builtin_arm_nop(); @@ -73,6 +108,32 @@ static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) { /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ +/* ROR */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __ror(uint32_t x, uint32_t y) { + y %= 32; + if (y == 0) return x; + return (x >> y) | (x << (32 - y)); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rorll(uint64_t x, uint32_t y) { + y %= 64; + if (y == 0) return x; + return (x >> y) | (x << (64 - y)); +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rorl(unsigned long x, uint32_t y) { +#if __SIZEOF_LONG__ == 4 + return __ror(x, y); +#else + return __rorll(x, y); +#endif +} + + +/* CLZ */ static __inline__ uint32_t __attribute__((always_inline, nodebug)) __clz(uint32_t t) { return __builtin_clz(t); @@ -85,13 +146,10 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug)) static __inline__ uint64_t __attribute__((always_inline, nodebug)) __clzll(uint64_t t) { -#if __SIZEOF_LONG_LONG__ == 8 return __builtin_clzll(t); -#else - return __builtin_clzl(t); -#endif } +/* REV */ static __inline__ uint32_t __attribute__((always_inline, nodebug)) __rev(uint32_t t) { return __builtin_bswap32(t); @@ -111,6 +169,53 @@ static __inline__ uint64_t __attribute__((always_inline, nodebug)) return __builtin_bswap64(t); } +/* REV16 */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __rev16(uint32_t t) { + return __ror(__rev(t), 16); +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rev16l(unsigned long t) { + return __rorl(__revl(t), sizeof(long) / 2); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rev16ll(uint64_t t) { + return __rorll(__revll(t), 32); +} + +/* REVSH */ +static __inline__ int16_t __attribute__((always_inline, nodebug)) + __revsh(int16_t t) { + return __builtin_bswap16(t); +} + +/* RBIT */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __rbit(uint32_t t) { + return __builtin_arm_rbit(t); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rbitll(uint64_t t) { +#if __ARM_32BIT_STATE + return (((uint64_t) __builtin_arm_rbit(t)) << 32) | + __builtin_arm_rbit(t >> 32); +#else + return __builtin_arm_rbit64(t); +#endif +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rbitl(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __rbit(t); +#else + return __rbitll(t); +#endif +} + /* * 9.4 Saturating intrinsics * diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h new file mode 100644 index 0000000..bc4d4ac --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h @@ -0,0 +1,60 @@ +/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AVX512BWINTRIN_H +#define __AVX512BWINTRIN_H + +typedef unsigned int __mmask32; +typedef unsigned long long __mmask64; +typedef char __v64qi __attribute__ ((vector_size (64))); +typedef short __v32hi __attribute__ ((__vector_size__ (64))); + + +/* Integer compare */ + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, + __u); +} + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h new file mode 100644 index 0000000..1a5ea15 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h @@ -0,0 +1,112 @@ +/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512ERINTRIN_H +#define __AVX512ERINTRIN_H + + +// rsqrt28 +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_rsqrt28_round_pd (__m512d __A, int __R) +{ + return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A, + (__v8df)_mm512_setzero_pd(), + (__mmask8)-1, + __R); +} +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_rsqrt28_round_ps(__m512 __A, int __R) +{ + return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A, + (__v16sf)_mm512_setzero_ps(), + (__mmask16)-1, + __R); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) +{ + return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + __R); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) +{ + return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + __R); +} + + +// rcp28 +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_rcp28_round_pd (__m512d __A, int __R) +{ + return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A, + (__v8df)_mm512_setzero_pd(), + (__mmask8)-1, + __R); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_rcp28_round_ps (__m512 __A, int __R) +{ + return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A, + (__v16sf)_mm512_setzero_ps (), + (__mmask16)-1, + __R); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) +{ + return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + __R); +} +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) +{ + return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + __R); +} + +#endif // __AVX512ERINTRIN_H diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h new file mode 100644 index 0000000..9c80710 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h @@ -0,0 +1,1036 @@ +/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512FINTRIN_H +#define __AVX512FINTRIN_H + +typedef double __v8df __attribute__((__vector_size__(64))); +typedef float __v16sf __attribute__((__vector_size__(64))); +typedef long long __v8di __attribute__((__vector_size__(64))); +typedef int __v16si __attribute__((__vector_size__(64))); + +typedef float __m512 __attribute__((__vector_size__(64))); +typedef double __m512d __attribute__((__vector_size__(64))); +typedef long long __m512i __attribute__((__vector_size__(64))); + +typedef unsigned char __mmask8; +typedef unsigned short __mmask16; + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +/* Create vectors with repeated elements */ + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_setzero_si512(void) +{ + return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) +{ + return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) +{ +#ifdef __x86_64__ + return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, + (__v8di) + _mm512_setzero_si512 (), + __M); +#else + return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, + (__v8di) + _mm512_setzero_si512 (), + __M); +#endif +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_setzero_ps(void) +{ + return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_setzero_pd(void) +{ + return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_set1_ps(float __w) +{ + return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_set1_pd(double __w) +{ + return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_set1_epi32(int __s) +{ + return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, + __s, __s, __s, __s, __s, __s, __s, __s }; +} + +static __inline __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_set1_epi64(long long __d) +{ + return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_broadcastss_ps(__m128 __X) +{ + float __f = __X[0]; + return (__v16sf){ __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f }; +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_broadcastsd_pd(__m128d __X) +{ + double __d = __X[0]; + return (__v8df){ __d, __d, __d, __d, + __d, __d, __d, __d }; +} + +/* Cast between vector types */ + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_castpd256_pd512(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_castps256_ps512(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, + -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm512_castpd512_pd128(__m512d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm512_castps512_ps128(__m512 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); +} + +/* Arithmetic */ + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_add_pd(__m512d __a, __m512d __b) +{ + return __a + __b; +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_add_ps(__m512 __a, __m512 __b) +{ + return __a + __b; +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_mul_pd(__m512d __a, __m512d __b) +{ + return __a * __b; +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_mul_ps(__m512 __a, __m512 __b) +{ + return __a * __b; +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_sub_pd(__m512d __a, __m512d __b) +{ + return __a - __b; +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_sub_ps(__m512 __a, __m512 __b) +{ + return __a - __b; +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_max_pd(__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_max_ps(__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i +__attribute__ ((__always_inline__, __nodebug__)) +_mm512_max_epi32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_max_epu32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_max_epi64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_max_epu64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_min_pd(__m512d __A, __m512d __B) +{ + return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_min_ps(__m512 __A, __m512 __B) +{ + return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i +__attribute__ ((__always_inline__, __nodebug__)) +_mm512_min_epi32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_min_epu32(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_min_epi64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_min_epu64(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mul_epi32(__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mul_epu32(__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, + (__v16si) __Y, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_sqrt_pd(__m512d a) +{ + return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a, + (__v8df) _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_sqrt_ps(__m512 a) +{ + return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a, + (__v16sf) _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_rsqrt14_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1);} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_rsqrt14_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt14_ss(__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt14_sd(__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_rcp14_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_rcp14_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rcp14_ss(__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_rcp14_sd(__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_floor_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_FLOOR, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_floor_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_FLOOR, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_ceil_ps(__m512 __A) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, + _MM_FROUND_CEIL, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_ceil_pd(__m512d __A) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, + _MM_FROUND_CEIL, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i __attribute__ (( __always_inline__, __nodebug__)) +_mm512_abs_epi64(__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ (( __always_inline__, __nodebug__)) +_mm512_abs_epi32(__m512i __A) +{ + return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_roundscale_ps(__m512 __A, const int __imm) +{ + return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, + (__v16sf) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_roundscale_pd(__m512d __A, const int __imm) +{ + return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, + (__v8df) __A, -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) + __builtin_ia32_vfmaddpd512_mask(__A, + __B, + __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) + __builtin_ia32_vfmsubpd512_mask(__A, + __B, + __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) +{ + return (__m512d) + __builtin_ia32_vfnmaddpd512_mask(__A, + __B, + __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) + __builtin_ia32_vfmaddps512_mask(__A, + __B, + __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) + __builtin_ia32_vfmsubps512_mask(__A, + __B, + __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) +{ + return (__m512) + __builtin_ia32_vfnmaddps512_mask(__A, + __B, + __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +/* Vector permutations */ + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I + /* idx */ , + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I + /* idx */ , + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) +{ + return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I + /* idx */ , + (__v8df) __A, + (__v8df) __B, + (__mmask8) -1); +} +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) +{ + return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I + /* idx */ , + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) +{ + return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A, + (__v8di)__B, + __I, + (__v8di)_mm512_setzero_si512(), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) +{ + return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A, + (__v16si)__B, + __I, + (__v16si)_mm512_setzero_si512(), + (__mmask16) -1); +} + +/* Vector Blend */ + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) +{ + return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) +{ + return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) +{ + return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +/* Compare */ + +static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a, + (__v16sf) b, p, (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +/* Conversion */ + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvttps_epu32(__m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) +_mm512_cvt_roundepi32_ps(__m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + __R); +} + +static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) +_mm512_cvt_roundepu32_ps(__m512i __A, const int __R) +{ + return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + __R); +} + +static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) +_mm512_cvtepi32_pd(__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) +_mm512_cvtepu32_pd(__m256i __A) +{ + return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} +static __inline __m256 __attribute__ (( __always_inline__, __nodebug__)) +_mm512_cvt_roundpd_ps(__m512d __A, const int __R) +{ + return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1, + __R); +} + +static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvtps_ph(__m512 __A, const int __I) +{ + return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, + __I, + (__v16hi) + _mm256_setzero_si256 (), + -1); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvtph_ps(__m256i __A) +{ + return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m512i __attribute__((__always_inline__, __nodebug__)) +_mm512_cvttps_epi32(__m512 a) +{ + return (__m512i) + __builtin_ia32_cvttps2dq512_mask((__v16sf) a, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm512_cvttpd_epi32(__m512d a) +{ + return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a, + (__v8si)_mm256_setzero_si256(), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + __R); +} +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvtt_roundps_epi32(__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __R); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvt_roundps_epi32(__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __R); +} +static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvt_roundpd_epi32(__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + __R); +} +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvt_roundps_epu32(__m512 __A, const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __R); +} +static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_cvt_roundpd_epu32(__m512d __A, const int __R) +{ + return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1, + __R); +} + +/* Unpack and Interleave */ +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 2, 18, 3, 19, + 2+4, 18+4, 3+4, 19+4, + 2+8, 18+8, 3+8, 19+8, + 2+12, 18+12, 3+12, 19+12); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 0, 16, 1, 17, + 0+4, 16+4, 1+4, 17+4, + 0+8, 16+8, 1+8, 17+8, + 0+12, 16+12, 1+12, 17+12); +} + +/* Bit Test */ + +static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_test_epi32_mask(__m512i __A, __m512i __B) +{ + return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_test_epi64_mask(__m512i __A, __m512i __B) +{ + return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +/* SIMD load ops */ + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_loadu_pd(double const *__p) +{ + struct __loadu_pd { + __m512d __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)__p)->__v; +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_loadu_ps(float const *__p) +{ + struct __loadu_ps { + __m512 __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_ps*)__p)->__v; +} + +/* SIMD store ops */ + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) +{ + __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, + (__mmask8) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) +{ + __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, + (__mmask16) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) +{ + __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_storeu_pd(void *__P, __m512d __A) +{ + __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, + (__mmask16) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_storeu_ps(void *__P, __m512 __A) +{ + __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_store_ps(void *__P, __m512 __A) +{ + *(__m512*)__P = __A; +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_store_pd(void *__P, __m512d __A) +{ + *(__m512d*)__P = __A; +} + +/* Mask ops */ + +static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_knot(__mmask16 __M) +{ + return __builtin_ia32_knothi(__M); +} + +/* Integer compare */ + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, + (__mmask8)-1); +} + +#endif // __AVX512FINTRIN_H diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h new file mode 100644 index 0000000..11333f8 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h @@ -0,0 +1,83 @@ +/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ----------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLBWINTRIN_H +#define __AVX512VLBWINTRIN_H + +/* Integer compare */ + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b, + __u); +} + + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, + __u); +} + + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b, + __u); +} + +#endif /* __AVX512VLBWINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h new file mode 100644 index 0000000..8a374b1 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h @@ -0,0 +1,83 @@ +/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLINTRIN_H +#define __AVX512VLINTRIN_H + +/* Integer compare */ + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, + __u); +} + + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, + __u); +} + + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, + __u); +} + +#endif /* __AVX512VLINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h index 43c4a5e..0e5fd55 100644 --- a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h @@ -43,7 +43,7 @@ static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) __tzcnt_u16(unsigned short __X) { - return __builtin_ctzs(__X); + return __X ? __builtin_ctzs(__X) : 16; } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) @@ -87,7 +87,7 @@ __blsr_u32(unsigned int __X) static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __tzcnt_u32(unsigned int __X) { - return __builtin_ctz(__X); + return __X ? __builtin_ctz(__X) : 32; } #ifdef __x86_64__ @@ -140,7 +140,7 @@ __blsr_u64(unsigned long long __X) static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __tzcnt_u64(unsigned long long __X) { - return __builtin_ctzll(__X); + return __X ? __builtin_ctzll(__X) : 64; } #endif /* __x86_64__ */ diff --git a/contrib/llvm/tools/clang/lib/Headers/cpuid.h b/contrib/llvm/tools/clang/lib/Headers/cpuid.h index f9254e9..5da02e0 100644 --- a/contrib/llvm/tools/clang/lib/Headers/cpuid.h +++ b/contrib/llvm/tools/clang/lib/Headers/cpuid.h @@ -25,6 +25,60 @@ #error this header is for x86 only #endif +/* Responses identification request with %eax 0 */ +/* AMD: "AuthenticAMD" */ +#define signature_AMD_ebx 0x68747541 +#define signature_AMD_edx 0x69746e65 +#define signature_AMD_ecx 0x444d4163 +/* CENTAUR: "CentaurHauls" */ +#define signature_CENTAUR_ebx 0x746e6543 +#define signature_CENTAUR_edx 0x48727561 +#define signature_CENTAUR_ecx 0x736c7561 +/* CYRIX: "CyrixInstead" */ +#define signature_CYRIX_ebx 0x69727943 +#define signature_CYRIX_edx 0x736e4978 +#define signature_CYRIX_ecx 0x64616574 +/* INTEL: "GenuineIntel" */ +#define signature_INTEL_ebx 0x756e6547 +#define signature_INTEL_edx 0x49656e69 +#define signature_INTEL_ecx 0x6c65746e +/* TM1: "TransmetaCPU" */ +#define signature_TM1_ebx 0x6e617254 +#define signature_TM1_edx 0x74656d73 +#define signature_TM1_ecx 0x55504361 +/* TM2: "GenuineTMx86" */ +#define signature_TM2_ebx 0x756e6547 +#define signature_TM2_edx 0x54656e69 +#define signature_TM2_ecx 0x3638784d +/* NSC: "Geode by NSC" */ +#define signature_NSC_ebx 0x646f6547 +#define signature_NSC_edx 0x43534e20 +#define signature_NSC_ecx 0x79622065 +/* NEXGEN: "NexGenDriven" */ +#define signature_NEXGEN_ebx 0x4778654e +#define signature_NEXGEN_edx 0x72446e65 +#define signature_NEXGEN_ecx 0x6e657669 +/* RISE: "RiseRiseRise" */ +#define signature_RISE_ebx 0x65736952 +#define signature_RISE_edx 0x65736952 +#define signature_RISE_ecx 0x65736952 +/* SIS: "SiS SiS SiS " */ +#define signature_SIS_ebx 0x20536953 +#define signature_SIS_edx 0x20536953 +#define signature_SIS_ecx 0x20536953 +/* UMC: "UMC UMC UMC " */ +#define signature_UMC_ebx 0x20434d55 +#define signature_UMC_edx 0x20434d55 +#define signature_UMC_ecx 0x20434d55 +/* VIA: "VIA VIA VIA " */ +#define signature_VIA_ebx 0x20414956 +#define signature_VIA_edx 0x20414956 +#define signature_VIA_ecx 0x20414956 +/* VORTEX: "Vortex86 SoC" */ +#define signature_VORTEX_ebx 0x74726f56 +#define signature_VORTEX_edx 0x36387865 +#define signature_VORTEX_ecx 0x436f5320 + /* Features in %ecx for level 1 */ #define bit_SSE3 0x00000001 #define bit_PCLMULQDQ 0x00000002 @@ -53,7 +107,7 @@ #define bit_XSAVE 0x04000000 #define bit_OSXSAVE 0x08000000 #define bit_AVX 0x10000000 -#define bit_RDRAND 0x40000000 +#define bit_RDRND 0x40000000 /* Features in %edx for level 1 */ #define bit_FPU 0x00000001 @@ -92,31 +146,29 @@ #define bit_SMEP 0x00000080 #define bit_ENH_MOVSB 0x00000200 -/* PIC on i386 uses %ebx, so preserve it. */ #if __i386__ #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ - __asm(" pushl %%ebx\n" \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#else +/* x86-64 uses %rbx as the base register, so preserve it. */ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ - " mov %%ebx,%1\n" \ - " popl %%ebx" \ + " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__level)) #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ - __asm(" pushl %%ebx\n" \ + __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ - " mov %%ebx,%1\n" \ - " popl %%ebx" \ + " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__level), "2"(__count)) -#else -#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ - __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ - : "0"(__level)) - -#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ - __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ - : "0"(__level), "2"(__count)) #endif static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h index b3f8569..28d0043 100644 --- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h @@ -155,148 +155,148 @@ _mm_xor_pd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 1); + return (__m128d)__builtin_ia32_cmpltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 2); + return (__m128d)__builtin_ia32_cmplepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + __m128d __c = __builtin_ia32_cmpltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + __m128d __c = __builtin_ia32_cmplesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } diff --git a/contrib/llvm/tools/clang/lib/Headers/float.h b/contrib/llvm/tools/clang/lib/Headers/float.h index 02ef6bf..238cf76 100644 --- a/contrib/llvm/tools/clang/lib/Headers/float.h +++ b/contrib/llvm/tools/clang/lib/Headers/float.h @@ -28,7 +28,7 @@ * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx */ -#if (defined(__MINGW32__) || defined(_MSC_VER)) && \ +#if (defined(__MINGW32__) || defined(_MSC_VER)) && __STDC_HOSTED__ && \ __has_include_next(<float.h>) # include_next <float.h> diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h index df4bea8..2400fea 100644 --- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h @@ -76,6 +76,26 @@ #include <fmaintrin.h> #endif +#ifdef __AVX512F__ +#include <avx512fintrin.h> +#endif + +#ifdef __AVX512VL__ +#include <avx512vlintrin.h> +#endif + +#ifdef __AVX512BW__ +#include <avx512bwintrin.h> +#endif + +#if defined (__AVX512VL__) && defined (__AVX512BW__) +#include <avx512vlbwintrin.h> +#endif + +#ifdef __AVX512ER__ +#include <avx512erintrin.h> +#endif + #ifdef __RDRND__ static __inline__ int __attribute__((__always_inline__, __nodebug__)) _rdrand16_step(unsigned short *__p) @@ -98,6 +118,58 @@ _rdrand64_step(unsigned long long *__p) #endif #endif /* __RDRND__ */ +#ifdef __FSGSBASE__ +#ifdef __x86_64__ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_readfsbase_u32(void) +{ + return __builtin_ia32_rdfsbase32(); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_readfsbase_u64(void) +{ + return __builtin_ia32_rdfsbase64(); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_readgsbase_u32(void) +{ + return __builtin_ia32_rdgsbase32(); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_readgsbase_u64(void) +{ + return __builtin_ia32_rdgsbase64(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writefsbase_u32(unsigned int __V) +{ + return __builtin_ia32_wrfsbase32(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writefsbase_u64(unsigned long long __V) +{ + return __builtin_ia32_wrfsbase64(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writegsbase_u32(unsigned int __V) +{ + return __builtin_ia32_wrgsbase32(__V); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_writegsbase_u64(unsigned long long __V) +{ + return __builtin_ia32_wrgsbase64(__V); +} +#endif +#endif /* __FSGSBASE__ */ + #ifdef __RTM__ #include <rtmintrin.h> #endif @@ -115,4 +187,8 @@ _xtest(void) #include <shaintrin.h> #endif +/* Some intrinsics inside adxintrin.h are available only if __ADX__ defined, + * whereas others are also available if __ADX__ undefined */ +#include <adxintrin.h> + #endif /* __IMMINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h index 62ab5ca..35d6659 100644 --- a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h @@ -35,20 +35,32 @@ static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) __lzcnt16(unsigned short __X) { - return __builtin_clzs(__X); + return __X ? __builtin_clzs(__X) : 16; } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __lzcnt32(unsigned int __X) { - return __builtin_clz(__X); + return __X ? __builtin_clz(__X) : 32; +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u32(unsigned int __X) +{ + return __X ? __builtin_clz(__X) : 32; } #ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __lzcnt64(unsigned long long __X) { - return __builtin_clzll(__X); + return __X ? __builtin_clzll(__X) : 64; +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u64(unsigned long long __X) +{ + return __X ? __builtin_clzll(__X) : 64; } #endif diff --git a/contrib/llvm/tools/clang/lib/Headers/module.modulemap b/contrib/llvm/tools/clang/lib/Headers/module.modulemap index 9f7944d..062464e 100644 --- a/contrib/llvm/tools/clang/lib/Headers/module.modulemap +++ b/contrib/llvm/tools/clang/lib/Headers/module.modulemap @@ -1,4 +1,4 @@ -module _Builtin_intrinsics [system] { +module _Builtin_intrinsics [system] [extern_c] { explicit module altivec { requires altivec header "altivec.h" @@ -7,6 +7,11 @@ module _Builtin_intrinsics [system] { explicit module arm { requires arm + explicit module acle { + header "arm_acle.h" + export * + } + explicit module neon { requires neon header "arm_neon.h" @@ -96,6 +101,17 @@ module _Builtin_intrinsics [system] { header "avx2intrin.h" } + explicit module avx512f { + requires avx512f + export avx2 + header "avx512fintrin.h" + } + + explicit module avx512er { + requires avx512er + header "avx512erintrin.h" + } + explicit module bmi { requires bmi header "bmiintrin.h" @@ -154,3 +170,7 @@ module _Builtin_intrinsics [system] { } } } + +module _Builtin_stddef_max_align_t [system] [extern_c] { + header "__stddef_max_align_t.h" +} diff --git a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h index 66ed055..391a4bb 100644 --- a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h @@ -38,37 +38,37 @@ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1nexte(__X, __Y); + return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1msg1_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1msg1(__X, __Y); + return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1msg2_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1msg2(__X, __Y); + return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) { - return __builtin_ia32_sha256rnds2(__X, __Y, __Z); + return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256msg1_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha256msg1(__X, __Y); + return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256msg2_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha256msg2(__X, __Y); + return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); } #endif /* __SHAINTRIN_H */ diff --git a/contrib/llvm/tools/clang/lib/Headers/stdatomic.h b/contrib/llvm/tools/clang/lib/Headers/stdatomic.h new file mode 100644 index 0000000..e3c3476 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/stdatomic.h @@ -0,0 +1,190 @@ +/*===---- stdatomic.h - Standard header for atomic types and operations -----=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_STDATOMIC_H +#define __CLANG_STDATOMIC_H + +/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for + * example, already has a Clang-compatible stdatomic.h header. + */ +#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>) +# include_next <stdatomic.h> +#else + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* 7.17.1 Introduction */ + +#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +#define ATOMIC_SHORT_T_LOCK_FREE __GCC_ATOMIC_SHORT_T_LOCK_FREE +#define ATOMIC_INT_T_LOCK_FREE __GCC_ATOMIC_INT_T_LOCK_FREE +#define ATOMIC_LONG_T_LOCK_FREE __GCC_ATOMIC_LONG_T_LOCK_FREE +#define ATOMIC_LLONG_T_LOCK_FREE __GCC_ATOMIC_LLONG_T_LOCK_FREE +#define ATOMIC_POINTER_T_LOCK_FREE __GCC_ATOMIC_POINTER_T_LOCK_FREE + +/* 7.17.2 Initialization */ + +#define ATOMIC_VAR_INIT(value) (value) +#define atomic_init __c11_atomic_init + +/* 7.17.3 Order and consistency */ + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +#define kill_dependency(y) (y) + +/* 7.17.4 Fences */ + +// These should be provided by the libc implementation. +void atomic_thread_fence(memory_order); +void atomic_signal_fence(memory_order); + +#define atomic_thread_fence(order) __c11_atomic_thread_fence(order) +#define atomic_signal_fence(order) __c11_atomic_signal_fence(order) + +/* 7.17.5 Lock-free property */ + +#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj))) + +/* 7.17.6 Atomic integer types */ + +#ifdef __cplusplus +typedef _Atomic(bool) atomic_bool; +#else +typedef _Atomic(_Bool) atomic_bool; +#endif +typedef _Atomic(char) atomic_char; +typedef _Atomic(signed char) atomic_schar; +typedef _Atomic(unsigned char) atomic_uchar; +typedef _Atomic(short) atomic_short; +typedef _Atomic(unsigned short) atomic_ushort; +typedef _Atomic(int) atomic_int; +typedef _Atomic(unsigned int) atomic_uint; +typedef _Atomic(long) atomic_long; +typedef _Atomic(unsigned long) atomic_ulong; +typedef _Atomic(long long) atomic_llong; +typedef _Atomic(unsigned long long) atomic_ullong; +typedef _Atomic(uint_least16_t) atomic_char16_t; +typedef _Atomic(uint_least32_t) atomic_char32_t; +typedef _Atomic(wchar_t) atomic_wchar_t; +typedef _Atomic(int_least8_t) atomic_int_least8_t; +typedef _Atomic(uint_least8_t) atomic_uint_least8_t; +typedef _Atomic(int_least16_t) atomic_int_least16_t; +typedef _Atomic(uint_least16_t) atomic_uint_least16_t; +typedef _Atomic(int_least32_t) atomic_int_least32_t; +typedef _Atomic(uint_least32_t) atomic_uint_least32_t; +typedef _Atomic(int_least64_t) atomic_int_least64_t; +typedef _Atomic(uint_least64_t) atomic_uint_least64_t; +typedef _Atomic(int_fast8_t) atomic_int_fast8_t; +typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; +typedef _Atomic(int_fast16_t) atomic_int_fast16_t; +typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; +typedef _Atomic(int_fast32_t) atomic_int_fast32_t; +typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; +typedef _Atomic(int_fast64_t) atomic_int_fast64_t; +typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; +typedef _Atomic(intptr_t) atomic_intptr_t; +typedef _Atomic(uintptr_t) atomic_uintptr_t; +typedef _Atomic(size_t) atomic_size_t; +typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; +typedef _Atomic(intmax_t) atomic_intmax_t; +typedef _Atomic(uintmax_t) atomic_uintmax_t; + +/* 7.17.7 Operations on atomic types */ + +#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST) +#define atomic_store_explicit __c11_atomic_store + +#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST) +#define atomic_load_explicit __c11_atomic_load + +#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST) +#define atomic_exchange_explicit __c11_atomic_exchange + +#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong + +#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak + +#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_add_explicit __c11_atomic_fetch_add + +#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub + +#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_or_explicit __c11_atomic_fetch_or + +#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor + +#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_and_explicit __c11_atomic_fetch_and + +/* 7.17.8 Atomic flag type and operations */ + +typedef struct atomic_flag { atomic_bool _Value; } atomic_flag; + +#define ATOMIC_FLAG_INIT { 0 } + +// These should be provided by the libc implementation. +#ifdef __cplusplus +bool atomic_flag_test_and_set(volatile atomic_flag *); +bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); +#else +_Bool atomic_flag_test_and_set(volatile atomic_flag *); +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); +#endif +void atomic_flag_clear(volatile atomic_flag *); +void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); + +#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST) +#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order) + +#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST) +#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order) + +#ifdef __cplusplus +} +#endif + +#endif /* __STDC_HOSTED__ */ +#endif /* __CLANG_STDATOMIC_H */ + diff --git a/contrib/llvm/tools/clang/lib/Headers/stddef.h b/contrib/llvm/tools/clang/lib/Headers/stddef.h index 2dfe0a2..7354996 100644 --- a/contrib/llvm/tools/clang/lib/Headers/stddef.h +++ b/contrib/llvm/tools/clang/lib/Headers/stddef.h @@ -30,11 +30,15 @@ #if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ !defined(__need_wchar_t) && !defined(__need_NULL) && \ !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) #define __STDDEF_H +#endif #define __need_ptrdiff_t #define __need_size_t #define __need_wchar_t #define __need_NULL +#define __need_STDDEF_H_misc /* __need_wint_t is intentionally not defined here. */ #endif @@ -60,7 +64,7 @@ typedef __SIZE_TYPE__ size_t; #undef __need_size_t #endif /*defined(__need_size_t) */ -#if defined(__STDDEF_H) +#if defined(__need_STDDEF_H_misc) /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is * enabled. */ #if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ @@ -71,7 +75,7 @@ typedef __SIZE_TYPE__ size_t; #endif typedef __SIZE_TYPE__ rsize_t; #endif -#endif /* defined(__STDDEF_H) */ +#endif /* defined(__need_STDDEF_H_misc) */ #if defined(__need_wchar_t) #ifndef __cplusplus @@ -109,26 +113,13 @@ using ::std::nullptr_t; #undef __need_NULL #endif /* defined(__need_NULL) */ -#if defined(__STDDEF_H) - +#if defined(__need_STDDEF_H_misc) #if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L -#if !defined(__CLANG_MAX_ALIGN_T_DEFINED) || __has_feature(modules) -#ifndef _MSC_VER -typedef struct { - long long __clang_max_align_nonce1 - __attribute__((__aligned__(__alignof__(long long)))); - long double __clang_max_align_nonce2 - __attribute__((__aligned__(__alignof__(long double)))); -} max_align_t; -#else -typedef double max_align_t; +#include "__stddef_max_align_t.h" #endif -#define __CLANG_MAX_ALIGN_T_DEFINED -#endif -#endif - #define offsetof(t, d) __builtin_offsetof(t, d) -#endif /* __STDDEF_H */ +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ diff --git a/contrib/llvm/tools/clang/lib/Headers/unwind.h b/contrib/llvm/tools/clang/lib/Headers/unwind.h index 685c1df..90aca16 100644 --- a/contrib/llvm/tools/clang/lib/Headers/unwind.h +++ b/contrib/llvm/tools/clang/lib/Headers/unwind.h @@ -26,8 +26,8 @@ #ifndef __CLANG_UNWIND_H #define __CLANG_UNWIND_H -#if __has_include_next(<unwind.h>) -/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available, +#if defined(__APPLE__) && __has_include_next(<unwind.h>) +/* Darwin (from 11.x on) provide an unwind.h. If that's available, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE @@ -199,6 +199,8 @@ _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); +_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); + void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); diff --git a/contrib/llvm/tools/clang/lib/Headers/vadefs.h b/contrib/llvm/tools/clang/lib/Headers/vadefs.h new file mode 100644 index 0000000..7fe9a74 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/vadefs.h @@ -0,0 +1,65 @@ +/* ===-------- vadefs.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* Only include this if we are aiming for MSVC compatibility. */ +#ifndef _MSC_VER +#include_next <vadefs.h> +#else + +#ifndef __clang_vadefs_h +#define __clang_vadefs_h + +#include_next <vadefs.h> + +/* Override macros from vadefs.h with definitions that work with Clang. */ +#ifdef _crt_va_start +#undef _crt_va_start +#define _crt_va_start(ap, param) __builtin_va_start(ap, param) +#endif +#ifdef _crt_va_end +#undef _crt_va_end +#define _crt_va_end(ap) __builtin_va_end(ap) +#endif +#ifdef _crt_va_arg +#undef _crt_va_arg +#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type) +#endif + +/* VS 2015 switched to double underscore names, which is an improvement, but now + * we have to intercept those names too. + */ +#ifdef __crt_va_start +#undef __crt_va_start +#define __crt_va_start(ap, param) __builtin_va_start(ap, param) +#endif +#ifdef __crt_va_end +#undef __crt_va_end +#define __crt_va_end(ap) __builtin_va_end(ap) +#endif +#ifdef __crt_va_arg +#undef __crt_va_arg +#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type) +#endif + +#endif +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h index c9befcb..d1afe81 100644 --- a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h @@ -182,153 +182,153 @@ _mm_xor_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 2); + return (__m128)__builtin_ia32_cmpless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 2); + return (__m128)__builtin_ia32_cmpleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 1), + __builtin_ia32_cmpltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 1); + return (__m128)__builtin_ia32_cmpltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 2), + __builtin_ia32_cmpless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 2); + return (__m128)__builtin_ia32_cmpleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 5), + __builtin_ia32_cmpnltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 5); + return (__m128)__builtin_ia32_cmpnltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 6), + __builtin_ia32_cmpnless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 6); + return (__m128)__builtin_ia32_cmpnleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordps(__a, __b); } static __inline__ int __attribute__((__always_inline__, __nodebug__)) |