diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/Intrin.h | 154 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/altivec.h | 1319 | ||||
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h | 1045 |
3 files changed, 2323 insertions, 195 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/Intrin.h index 7ba311e..24b3eae 100644 --- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/Intrin.h @@ -513,47 +513,40 @@ _BitScanReverse(unsigned long *_Index, unsigned long _Mask) { return 1; } static __inline__ unsigned short __DEFAULT_FN_ATTRS -__popcnt16(unsigned short value) { - return __builtin_popcount((int)value); +__popcnt16(unsigned short _Value) { + return __builtin_popcount((int)_Value); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__popcnt(unsigned int value) { - return __builtin_popcount(value); +__popcnt(unsigned int _Value) { + return __builtin_popcount(_Value); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittest(long const *a, long b) { - return (*a >> b) & 1; +_bittest(long const *_BitBase, long _BitPos) { + return (*_BitBase >> _BitPos) & 1; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandcomplement(long *a, long b) { - unsigned char x = (*a >> b) & 1; - *a = *a ^ (1 << b); - return x; +_bittestandcomplement(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase ^ (1 << _BitPos); + return _Res; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandreset(long *a, long b) { - unsigned char x = (*a >> b) & 1; - *a = *a & ~(1 << b); - return x; +_bittestandreset(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase & ~(1 << _BitPos); + return _Res; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandset(long *a, long b) { - unsigned char x = (*a >> b) & 1; - *a = *a | (1 << b); - return x; +_bittestandset(long *_BitBase, long _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase | (1 << _BitPos); + return _Res; } -#if defined(__i386__) || defined(__x86_64__) static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset(long volatile *__BitBase, long __BitPos) { - unsigned char __Res; - __asm__ ("xor %0, %0\n" - "lock bts %2, %1\n" - "setc %0\n" - : "=r" (__Res), "+m"(*__BitBase) - : "Ir"(__BitPos)); - return __Res; +_interlockedbittestandset(long volatile *_BitBase, long _BitPos) { + long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_SEQ_CST); + return (_PrevVal >> _BitPos) & 1; } -#endif #ifdef __x86_64__ static __inline__ unsigned char __DEFAULT_FN_ATTRS _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { @@ -571,40 +564,36 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { } static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS - __popcnt64(unsigned __int64 value) { - return __builtin_popcountll(value); +__popcnt64(unsigned __int64 _Value) { + return __builtin_popcountll(_Value); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittest64(__int64 const *a, __int64 b) { - return (*a >> b) & 1; +_bittest64(__int64 const *_BitBase, __int64 _BitPos) { + return (*_BitBase >> _BitPos) & 1; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandcomplement64(__int64 *a, __int64 b) { - unsigned char x = (*a >> b) & 1; - *a = *a ^ (1ll << b); - return x; +_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase ^ (1ll << _BitPos); + return _Res; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandreset64(__int64 *a, __int64 b) { - unsigned char x = (*a >> b) & 1; - *a = *a & ~(1ll << b); - return x; +_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase & ~(1ll << _BitPos); + return _Res; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_bittestandset64(__int64 *a, __int64 b) { - unsigned char x = (*a >> b) & 1; - *a = *a | (1ll << b); - return x; +_bittestandset64(__int64 *_BitBase, __int64 _BitPos) { + unsigned char _Res = (*_BitBase >> _BitPos) & 1; + *_BitBase = *_BitBase | (1ll << _BitPos); + return _Res; } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) { - unsigned char __Res; - __asm__ ("xor %0, %0\n" - "lock bts %2, %1\n" - "setc %0\n" - : "=r" (__Res), "+m"(*__BitBase) - : "Ir"(__BitPos)); - return __Res; +_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) { + long long _PrevVal = + __atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST); + return (_PrevVal >> _BitPos) & 1; } #endif /*----------------------------------------------------------------------------*\ @@ -612,16 +601,16 @@ _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { - return __atomic_add_fetch(_Addend, _Value, 0) - _Value; + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { - return __atomic_add_fetch(_Addend, _Value, 0) - _Value; + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { - return __atomic_add_fetch(_Addend, _Value, 0) - _Value; + return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -629,20 +618,20 @@ _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchangeSub8(char volatile *_Subend, char _Value) { - return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchangeSub16(short volatile *_Subend, short _Value) { - return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchangeSub(long volatile *_Subend, long _Value) { - return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { - return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; + return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -650,12 +639,12 @@ _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { \*----------------------------------------------------------------------------*/ static __inline__ short __DEFAULT_FN_ATTRS _InterlockedIncrement16(short volatile *_Value) { - return __atomic_add_fetch(_Value, 1, 0); + return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedIncrement64(__int64 volatile *_Value) { - return __atomic_add_fetch(_Value, 1, 0); + return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -663,12 +652,12 @@ _InterlockedIncrement64(__int64 volatile *_Value) { \*----------------------------------------------------------------------------*/ static __inline__ short __DEFAULT_FN_ATTRS _InterlockedDecrement16(short volatile *_Value) { - return __atomic_sub_fetch(_Value, 1, 0); + return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedDecrement64(__int64 volatile *_Value) { - return __atomic_sub_fetch(_Value, 1, 0); + return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -676,20 +665,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedAnd8(char volatile *_Value, char _Mask) { - return __atomic_and_fetch(_Value, _Mask, 0); + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedAnd16(short volatile *_Value, short _Mask) { - return __atomic_and_fetch(_Value, _Mask, 0); + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedAnd(long volatile *_Value, long _Mask) { - return __atomic_and_fetch(_Value, _Mask, 0); + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { - return __atomic_and_fetch(_Value, _Mask, 0); + return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -697,20 +686,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedOr8(char volatile *_Value, char _Mask) { - return __atomic_or_fetch(_Value, _Mask, 0); + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedOr16(short volatile *_Value, short _Mask) { - return __atomic_or_fetch(_Value, _Mask, 0); + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedOr(long volatile *_Value, long _Mask) { - return __atomic_or_fetch(_Value, _Mask, 0); + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { - return __atomic_or_fetch(_Value, _Mask, 0); + return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -718,20 +707,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedXor8(char volatile *_Value, char _Mask) { - return __atomic_xor_fetch(_Value, _Mask, 0); + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedXor16(short volatile *_Value, short _Mask) { - return __atomic_xor_fetch(_Value, _Mask, 0); + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedXor(long volatile *_Value, long _Mask) { - return __atomic_xor_fetch(_Value, _Mask, 0); + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { - return __atomic_xor_fetch(_Value, _Mask, 0); + return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST); } #endif /*----------------------------------------------------------------------------*\ @@ -739,18 +728,18 @@ _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { \*----------------------------------------------------------------------------*/ static __inline__ char __DEFAULT_FN_ATTRS _InterlockedExchange8(char volatile *_Target, char _Value) { - __atomic_exchange(_Target, &_Value, &_Value, 0); + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); return _Value; } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedExchange16(short volatile *_Target, short _Value) { - __atomic_exchange(_Target, &_Value, &_Value, 0); + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); return _Value; } #ifdef __x86_64__ static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { - __atomic_exchange(_Target, &_Value, &_Value, 0); + __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST); return _Value; } #endif @@ -760,19 +749,22 @@ _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { static __inline__ char __DEFAULT_FN_ATTRS _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, char _Comparand) { - __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return _Comparand; } static __inline__ short __DEFAULT_FN_ATTRS _InterlockedCompareExchange16(short volatile *_Destination, short _Exchange, short _Comparand) { - __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return _Comparand; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { - __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return _Comparand; } /*----------------------------------------------------------------------------*\ diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h index 2c80e24..f52bcbc 100644 --- a/contrib/llvm/tools/clang/lib/Headers/altivec.h +++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h @@ -48,7 +48,8 @@ static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c); -static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b, +static vector short __ATTRS_o_ai vec_perm(vector signed short __a, + vector signed short __b, vector unsigned char __c); static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a, @@ -62,7 +63,8 @@ static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a, static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c); -static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b, +static vector int __ATTRS_o_ai vec_perm(vector signed int __a, + vector signed int __b, vector unsigned char __c); static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a, @@ -77,14 +79,18 @@ static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, vector unsigned char __c); #ifdef __VSX__ -static vector long long __ATTRS_o_ai vec_perm(vector long long __a, - vector long long __b, +static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a, + vector signed long long __b, vector unsigned char __c); static vector unsigned long long __ATTRS_o_ai vec_perm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned char __c); +static vector bool long long __ATTRS_o_ai +vec_perm(vector bool long long __a, vector bool long long __b, + vector unsigned char __c); + static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b, vector unsigned char __c); #endif @@ -1735,6 +1741,48 @@ static vector bool long long __ATTRS_o_ai vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { return vec_cmpgt(__b, __a); } + +/* vec_cntlz */ + +static vector signed char __ATTRS_o_ai vec_cntlz(vector signed char __a) { + return __builtin_altivec_vclzb(__a); +} +static vector unsigned char __ATTRS_o_ai vec_cntlz(vector unsigned char __a) { + return __builtin_altivec_vclzb(__a); +} +static vector signed short __ATTRS_o_ai vec_cntlz(vector signed short __a) { + return __builtin_altivec_vclzh(__a); +} +static vector unsigned short __ATTRS_o_ai vec_cntlz(vector unsigned short __a) { + return __builtin_altivec_vclzh(__a); +} +static vector signed int __ATTRS_o_ai vec_cntlz(vector signed int __a) { + return __builtin_altivec_vclzw(__a); +} +static vector unsigned int __ATTRS_o_ai vec_cntlz(vector unsigned int __a) { + return __builtin_altivec_vclzw(__a); +} +static vector signed long long __ATTRS_o_ai +vec_cntlz(vector signed long long __a) { + return __builtin_altivec_vclzd(__a); +} +static vector unsigned long long __ATTRS_o_ai +vec_cntlz(vector unsigned long long __a) { + return __builtin_altivec_vclzd(__a); +} +#endif + +/* vec_cpsgn */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai vec_cpsgn(vector float __a, vector float __b) { + return __builtin_vsx_xvcpsgnsp(__a, __b); +} + +static vector double __ATTRS_o_ai vec_cpsgn(vector double __a, + vector double __b) { + return __builtin_vsx_xvcpsgndp(__a, __b); +} #endif /* vec_ctf */ @@ -1790,14 +1838,58 @@ vec_vctuxs(vector float __a, int __b) { } /* vec_div */ + +/* Integer vector divides (vectors are scalarized, elements divided + and the vectors reassembled). +*/ +static vector signed char __ATTRS_o_ai vec_div(vector signed char __a, + vector signed char __b) { + return __a / __b; +} + +static vector unsigned char __ATTRS_o_ai vec_div(vector unsigned char __a, + vector unsigned char __b) { + return __a / __b; +} + +static vector signed short __ATTRS_o_ai vec_div(vector signed short __a, + vector signed short __b) { + return __a / __b; +} + +static vector unsigned short __ATTRS_o_ai vec_div(vector unsigned short __a, + vector unsigned short __b) { + return __a / __b; +} + +static vector signed int __ATTRS_o_ai vec_div(vector signed int __a, + vector signed int __b) { + return __a / __b; +} + +static vector unsigned int __ATTRS_o_ai vec_div(vector unsigned int __a, + vector unsigned int __b) { + return __a / __b; +} + #ifdef __VSX__ +static vector signed long long __ATTRS_o_ai +vec_div(vector signed long long __a, vector signed long long __b) { + return __a / __b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_div(vector unsigned long long __a, vector unsigned long long __b) { + return __a / __b; +} + static vector float __ATTRS_o_ai vec_div(vector float __a, vector float __b) { - return __builtin_vsx_xvdivsp(__a, __b); + return __a / __b; } static vector double __ATTRS_o_ai vec_div(vector double __a, vector double __b) { - return __builtin_vsx_xvdivdp(__a, __b); + return __a / __b; } #endif @@ -1841,6 +1933,189 @@ vec_dstt(const void *__a, int __b, int __c) { __builtin_altivec_dstt(__a, __b, __c); } +/* vec_eqv */ + +#ifdef __POWER8_VECTOR__ +static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a, + vector signed char __b) { + return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed char __ATTRS_o_ai vec_eqv(vector bool char __a, + vector signed char __b) { + return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a, + vector bool char __b) { + return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a, + vector unsigned char __b) { + return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned char __ATTRS_o_ai vec_eqv(vector bool char __a, + vector unsigned char __b) { + return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a, + vector bool char __b) { + return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a, + vector signed short __b) { + return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed short __ATTRS_o_ai vec_eqv(vector bool short __a, + vector signed short __b) { + return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a, + vector bool short __b) { + return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a, + vector unsigned short __b) { + return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned short __ATTRS_o_ai vec_eqv(vector bool short __a, + vector unsigned short __b) { + return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a, + vector bool short __b) { + return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a, + vector signed int __b) { + return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed int __ATTRS_o_ai vec_eqv(vector bool int __a, + vector signed int __b) { + return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a, + vector bool int __b) { + return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a, + vector unsigned int __b) { + return __builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned int __ATTRS_o_ai vec_eqv(vector bool int __a, + vector unsigned int __b) { + return __builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a, + vector bool int __b) { + return __builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector signed long long __ATTRS_o_ai +vec_eqv(vector signed long long __a, vector signed long long __b) { + return (vector signed long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector signed long long __ATTRS_o_ai +vec_eqv(vector bool long long __a, vector signed long long __b) { + return (vector signed long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector signed long long __ATTRS_o_ai +vec_eqv(vector signed long long __a, vector bool long long __b) { + return (vector signed long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_eqv(vector unsigned long long __a, vector unsigned long long __b) { + return (vector unsigned long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_eqv(vector bool long long __a, vector unsigned long long __b) { + return (vector unsigned long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_eqv(vector unsigned long long __a, vector bool long long __b) { + return (vector unsigned long long) + __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); +} + +static vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) { + return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector float __ATTRS_o_ai vec_eqv(vector bool int __a, + vector float __b) { + return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector float __ATTRS_o_ai vec_eqv(vector float __a, + vector bool int __b) { + return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector double __ATTRS_o_ai vec_eqv(vector double __a, + vector double __b) { + return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector double __ATTRS_o_ai vec_eqv(vector bool long long __a, + vector double __b) { + return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} + +static vector double __ATTRS_o_ai vec_eqv(vector double __a, + vector bool long long __b) { + return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a, + (vector unsigned int)__b); +} +#endif + /* vec_expte */ static vector float __attribute__((__always_inline__)) @@ -1857,10 +2132,19 @@ vec_vexptefp(vector float __a) { /* vec_floor */ -static vector float __attribute__((__always_inline__)) -vec_floor(vector float __a) { +static vector float __ATTRS_o_ai vec_floor(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspim(__a); +#else return __builtin_altivec_vrfim(__a); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_floor(vector double __a) { + return __builtin_vsx_xvrdpim(__a); } +#endif /* vec_vrfim */ @@ -2532,10 +2816,21 @@ static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) { /* vec_madd */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_madd(vector float __a, vector float __b, vector float __c) { +#ifdef __VSX__ + return __builtin_vsx_xvmaddasp(__a, __b, __c); +#else return __builtin_altivec_vmaddfp(__a, __b, __c); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_madd(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvmaddadp(__a, __b, __c); } +#endif /* vec_vmaddfp */ @@ -2559,6 +2854,20 @@ vec_vmhaddshs(vector signed short __a, vector signed short __b, return __builtin_altivec_vmhaddshs(__a, __b, __c); } +/* vec_msub */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai +vec_msub(vector float __a, vector float __b, vector float __c) { + return __builtin_vsx_xvmsubasp(__a, __b, __c); +} + +static vector double __ATTRS_o_ai +vec_msub(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvmsubadp(__a, __b, __c); +} +#endif + /* vec_max */ static vector signed char __ATTRS_o_ai vec_max(vector signed char __a, @@ -2893,6 +3202,86 @@ static vector float __ATTRS_o_ai vec_mergeh(vector float __a, 0x14, 0x15, 0x16, 0x17)); } +#ifdef __VSX__ +static vector signed long long __ATTRS_o_ai +vec_mergeh(vector signed long long __a, vector signed long long __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} + +static vector signed long long __ATTRS_o_ai +vec_mergeh(vector signed long long __a, vector bool long long __b) { + return vec_perm(__a, (vector signed long long)__b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} + +static vector signed long long __ATTRS_o_ai +vec_mergeh(vector bool long long __a, vector signed long long __b) { + return vec_perm((vector signed long long)__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} + +static vector unsigned long long __ATTRS_o_ai +vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} + +static vector unsigned long long __ATTRS_o_ai +vec_mergeh(vector unsigned long long __a, vector bool long long __b) { + return vec_perm(__a, (vector unsigned long long)__b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} + +static vector unsigned long long __ATTRS_o_ai +vec_mergeh(vector bool long long __a, vector unsigned long long __b) { + return vec_perm((vector unsigned long long)__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} +static vector double __ATTRS_o_ai vec_mergeh(vector double __a, + vector double __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} +static vector double __ATTRS_o_ai vec_mergeh(vector double __a, + vector bool long long __b) { + return vec_perm(__a, (vector double)__b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} +static vector double __ATTRS_o_ai vec_mergeh(vector bool long long __a, + vector double __b) { + return vec_perm((vector double)__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17)); +} +#endif + /* vec_vmrghb */ #define __builtin_altivec_vmrghb vec_vmrghb @@ -3081,6 +3470,81 @@ static vector float __ATTRS_o_ai vec_mergel(vector float __a, 0x1C, 0x1D, 0x1E, 0x1F)); } +#ifdef __VSX__ +static vector signed long long __ATTRS_o_ai +vec_mergel(vector signed long long __a, vector signed long long __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector signed long long __ATTRS_o_ai +vec_mergel(vector signed long long __a, vector bool long long __b) { + return vec_perm(__a, (vector signed long long)__b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector signed long long __ATTRS_o_ai +vec_mergel(vector bool long long __a, vector signed long long __b) { + return vec_perm((vector signed long long)__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector unsigned long long __ATTRS_o_ai +vec_mergel(vector unsigned long long __a, vector unsigned long long __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector unsigned long long __ATTRS_o_ai +vec_mergel(vector unsigned long long __a, vector bool long long __b) { + return vec_perm(__a, (vector unsigned long long)__b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector unsigned long long __ATTRS_o_ai +vec_mergel(vector bool long long __a, vector unsigned long long __b) { + return vec_perm((vector unsigned long long)__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector double __ATTRS_o_ai +vec_mergel(vector double __a, vector double __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector double __ATTRS_o_ai +vec_mergel(vector double __a, vector bool long long __b) { + return vec_perm(__a, (vector double)__b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +static vector double __ATTRS_o_ai +vec_mergel(vector bool long long __a, vector double __b) { + return vec_perm((vector double)__a, __b, + (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x18, 0X19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F)); +} +#endif + /* vec_vmrglb */ #define __builtin_altivec_vmrglb vec_vmrglb @@ -3677,6 +4141,65 @@ static void __ATTRS_o_ai vec_mtvscr(vector float __a) { __builtin_altivec_mtvscr((vector int)__a); } +/* vec_mul */ + +/* Integer vector multiplication will involve multiplication of the odd/even + elements separately, then truncating the results and moving to the + result vector. +*/ +static vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, + vector signed char __b) { + return __a * __b; +} + +static vector unsigned char __ATTRS_o_ai vec_mul(vector unsigned char __a, + vector unsigned char __b) { + return __a * __b; +} + +static vector signed short __ATTRS_o_ai vec_mul(vector signed short __a, + vector signed short __b) { + return __a * __b; +} + +static vector unsigned short __ATTRS_o_ai vec_mul(vector unsigned short __a, + vector unsigned short __b) { + return __a * __b; +} + +static vector signed int __ATTRS_o_ai vec_mul(vector signed int __a, + vector signed int __b) { + return __a * __b; +} + +static vector unsigned int __ATTRS_o_ai vec_mul(vector unsigned int __a, + vector unsigned int __b) { + return __a * __b; +} + +#ifdef __VSX__ +static vector signed long long __ATTRS_o_ai +vec_mul(vector signed long long __a, vector signed long long __b) { + return __a * __b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_mul(vector unsigned long long __a, vector unsigned long long __b) { + return __a * __b; +} +#endif + +static vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) { + return __a * __b; +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_mul(vector double __a, vector double __b) { + return __a * __b; +} +#endif + /* The vmulos* and vmules* instructions have a big endian bias, so we must reverse the meaning of "even" and "odd" for little endian. */ @@ -3882,12 +4405,165 @@ vec_vmulouh(vector unsigned short __a, vector unsigned short __b) { #endif } +/* vec_nand */ + +#ifdef __POWER8_VECTOR__ +static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a, + vector signed char __b) { + return ~(__a & __b); +} + +static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a, + vector bool char __b) { + return ~(__a & __b); +} + +static vector signed char __ATTRS_o_ai vec_nand(vector bool char __a, + vector signed char __b) { + return ~(__a & __b); +} + +static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a, + vector unsigned char __b) { + return ~(__a & __b); +} + +static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a, + vector bool char __b) { + return ~(__a & __b); + +} + +static vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a, + vector unsigned char __b) { + return ~(__a & __b); +} + +static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a, + vector signed short __b) { + return ~(__a & __b); +} + +static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a, + vector bool short __b) { + return ~(__a & __b); +} + +static vector signed short __ATTRS_o_ai vec_nand(vector bool short __a, + vector signed short __b) { + return ~(__a & __b); +} + +static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a, + vector unsigned short __b) { + return ~(__a & __b); +} + +static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a, + vector bool short __b) { + return ~(__a & __b); + +} + +static vector unsigned short __ATTRS_o_ai vec_nand(vector bool short __a, + vector unsigned short __b) { + return ~(__a & __b); + +} + +static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a, + vector signed int __b) { + return ~(__a & __b); +} + +static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a, + vector bool int __b) { + return ~(__a & __b); +} + +static vector signed int __ATTRS_o_ai vec_nand(vector bool int __a, + vector signed int __b) { + return ~(__a & __b); +} + +static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a, + vector unsigned int __b) { + return ~(__a & __b); +} + +static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a, + vector bool int __b) { + return ~(__a & __b); +} + +static vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a, + vector unsigned int __b) { + return ~(__a & __b); +} + +static vector signed long long __ATTRS_o_ai +vec_nand(vector signed long long __a, vector signed long long __b) { + return ~(__a & __b); +} + +static vector signed long long __ATTRS_o_ai +vec_nand(vector signed long long __a, vector bool long long __b) { + return ~(__a & __b); +} + +static vector signed long long __ATTRS_o_ai +vec_nand(vector bool long long __a, vector signed long long __b) { + return ~(__a & __b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_nand(vector unsigned long long __a, vector unsigned long long __b) { + return ~(__a & __b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_nand(vector unsigned long long __a, vector bool long long __b) { + return ~(__a & __b); +} + +static vector unsigned long long __ATTRS_o_ai +vec_nand(vector bool long long __a, vector unsigned long long __b) { + return ~(__a & __b); +} + +#endif + +/* vec_nmadd */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai +vec_nmadd(vector float __a, vector float __b, vector float __c) { + return __builtin_vsx_xvnmaddasp(__a, __b, __c); +} + +static vector double __ATTRS_o_ai +vec_nmadd(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvnmaddadp(__a, __b, __c); +} +#endif + /* vec_nmsub */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_nmsub(vector float __a, vector float __b, vector float __c) { +#ifdef __VSX__ + return __builtin_vsx_xvnmsubasp(__a, __b, __c); +#else return __builtin_altivec_vnmsubfp(__a, __b, __c); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_nmsub(vector double __a, vector double __b, vector double __c) { + return __builtin_vsx_xvnmsubadp(__a, __b, __c); } +#endif /* vec_vnmsubfp */ @@ -3949,6 +4625,15 @@ static vector float __ATTRS_o_ai vec_nor(vector float __a, vector float __b) { return (vector float)__res; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_nor(vector double __a, vector double __b) { + vector unsigned long long __res = + ~((vector unsigned long long)__a | (vector unsigned long long)__b); + return (vector double)__res; +} +#endif + /* vec_vnor */ static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a, @@ -4141,6 +4826,22 @@ static vector float __ATTRS_o_ai vec_or(vector float __a, vector bool int __b) { } #ifdef __VSX__ +static vector double __ATTRS_o_ai vec_or(vector bool long long __a, + vector double __b) { + return (vector unsigned long long)__a | (vector unsigned long long)__b; +} + +static vector double __ATTRS_o_ai vec_or(vector double __a, + vector bool long long __b) { + return (vector unsigned long long)__a | (vector unsigned long long)__b; +} + +static vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) { + vector unsigned long long __res = + (vector unsigned long long)__a | (vector unsigned long long)__b; + return (vector double)__res; +} + static vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a, vector signed long long __b) { return __a | __b; @@ -4177,6 +4878,128 @@ static vector bool long long __ATTRS_o_ai vec_or(vector bool long long __a, } #endif +#ifdef __POWER8_VECTOR__ +static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a, + vector signed char __b) { + return __a | ~__b; +} + +static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a, + vector bool char __b) { + return __a | ~__b; +} + +static vector signed char __ATTRS_o_ai vec_orc(vector bool char __a, + vector signed char __b) { + return __a | ~__b; +} + +static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a, + vector unsigned char __b) { + return __a | ~__b; +} + +static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a, + vector bool char __b) { + return __a | ~__b; +} + +static vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a, + vector unsigned char __b) { + return __a | ~__b; +} + +static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a, + vector signed short __b) { + return __a | ~__b; +} + +static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a, + vector bool short __b) { + return __a | ~__b; +} + +static vector signed short __ATTRS_o_ai vec_orc(vector bool short __a, + vector signed short __b) { + return __a | ~__b; +} + +static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a, + vector unsigned short __b) { + return __a | ~__b; +} + +static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a, + vector bool short __b) { + return __a | ~__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_orc(vector bool short __a, vector unsigned short __b) { + return __a | ~__b; +} + +static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a, + vector signed int __b) { + return __a | ~__b; +} + +static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a, + vector bool int __b) { + return __a | ~__b; +} + +static vector signed int __ATTRS_o_ai vec_orc(vector bool int __a, + vector signed int __b) { + return __a | ~__b; +} + +static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a, + vector unsigned int __b) { + return __a | ~__b; +} + +static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a, + vector bool int __b) { + return __a | ~__b; +} + +static vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a, + vector unsigned int __b) { + return __a | ~__b; +} + +static vector signed long long __ATTRS_o_ai +vec_orc(vector signed long long __a, vector signed long long __b) { + return __a | ~__b; +} + +static vector signed long long __ATTRS_o_ai vec_orc(vector signed long long __a, + vector bool long long __b) { + return __a | ~__b; +} + +static vector signed long long __ATTRS_o_ai +vec_orc(vector bool long long __a, vector signed long long __b) { + return __a | ~__b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_orc(vector unsigned long long __a, vector unsigned long long __b) { + return __a | ~__b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_orc(vector unsigned long long __a, vector bool long long __b) { + return __a | ~__b; +} + +static vector unsigned long long __ATTRS_o_ai +vec_orc(vector bool long long __a, vector unsigned long long __b) { + return __a | ~__b; +} +#endif + /* vec_vor */ static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a, @@ -4431,6 +5254,53 @@ static vector bool short __ATTRS_o_ai vec_pack(vector bool int __a, #endif } +#ifdef __VSX__ +static vector signed int __ATTRS_o_ai vec_pack(vector signed long long __a, + vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return (vector signed int)vec_perm( + __a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); +#else + return (vector signed int)vec_perm( + __a, __b, + (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); +#endif +} +static vector unsigned int __ATTRS_o_ai +vec_pack(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return (vector unsigned int)vec_perm( + __a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); +#else + return (vector unsigned int)vec_perm( + __a, __b, + (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); +#endif +} + +static vector bool int __ATTRS_o_ai vec_pack(vector bool long long __a, + vector bool long long __b) { +#ifdef __LITTLE_ENDIAN__ + return (vector bool int)vec_perm( + __a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); +#else + return (vector bool int)vec_perm( + __a, __b, + (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); +#endif +} + +#endif + /* vec_vpkuhum */ #define __builtin_altivec_vpkuhum vec_vpkuhum @@ -4895,17 +5765,18 @@ static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, #endif } -static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b, +static vector short __ATTRS_o_ai vec_perm(vector signed short __a, + vector signed short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); - return (vector short)__builtin_altivec_vperm_4si((vector int)__b, - (vector int)__a, __d); + return (vector signed short)__builtin_altivec_vperm_4si((vector int)__b, + (vector int)__a, __d); #else - return (vector short)__builtin_altivec_vperm_4si((vector int)__a, - (vector int)__b, __c); + return (vector signed short)__builtin_altivec_vperm_4si((vector int)__a, + (vector int)__b, __c); #endif } @@ -4953,15 +5824,16 @@ static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, #endif } -static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b, +static vector int __ATTRS_o_ai vec_perm(vector signed int __a, + vector signed int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); - return (vector int)__builtin_altivec_vperm_4si(__b, __a, __d); + return (vector signed int)__builtin_altivec_vperm_4si(__b, __a, __d); #else - return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c); + return (vector signed int)__builtin_altivec_vperm_4si(__a, __b, __c); #endif } @@ -5010,16 +5882,18 @@ static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, } #ifdef __VSX__ -static vector long long __ATTRS_o_ai vec_perm(vector long long __a, - vector long long __b, +static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a, + vector signed long long __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); - return (vector long long)__builtin_altivec_vperm_4si(__b, __a, __d); + return (vector signed long long)__builtin_altivec_vperm_4si( + (vector int)__b, (vector int)__a, __d); #else - return (vector long long)__builtin_altivec_vperm_4si(__a, __b, __c); + return (vector signed long long)__builtin_altivec_vperm_4si( + (vector int)__a, (vector int)__b, __c); #endif } @@ -5038,6 +5912,21 @@ vec_perm(vector unsigned long long __a, vector unsigned long long __b, #endif } +static vector bool long long __ATTRS_o_ai +vec_perm(vector bool long long __a, vector bool long long __b, + vector unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255}; + __d = vec_xor(__c, __d); + return (vector bool long long)__builtin_altivec_vperm_4si( + (vector int)__b, (vector int)__a, __d); +#else + return (vector bool long long)__builtin_altivec_vperm_4si( + (vector int)__a, (vector int)__b, __c); +#endif +} + static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -5139,10 +6028,20 @@ static vector double __ATTRS_o_ai vec_vperm(vector double __a, /* vec_re */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_re(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvresp(__a); +#else return __builtin_altivec_vrefp(__a); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_re(vector double __a) { + return __builtin_vsx_xvredp(__a); } +#endif /* vec_vrefp */ @@ -5232,10 +6131,41 @@ static vector unsigned int __ATTRS_o_ai vec_vrlw(vector unsigned int __a, /* vec_round */ -static vector float __attribute__((__always_inline__)) -vec_round(vector float __a) { +static vector float __ATTRS_o_ai vec_round(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspi(__a); +#else return __builtin_altivec_vrfin(__a); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_round(vector double __a) { + return __builtin_vsx_xvrdpi(__a); +} + +/* vec_rint */ + +static vector float __ATTRS_o_ai +vec_rint(vector float __a) { + return __builtin_vsx_xvrspic(__a); +} + +static vector double __ATTRS_o_ai +vec_rint(vector double __a) { + return __builtin_vsx_xvrdpic(__a); +} + +/* vec_nearbyint */ + +static vector float __ATTRS_o_ai vec_nearbyint(vector float __a) { + return __builtin_vsx_xvrspi(__a); +} + +static vector double __ATTRS_o_ai vec_nearbyint(vector double __a) { + return __builtin_vsx_xvrdpi(__a); } +#endif /* vec_vrfin */ @@ -5244,12 +6174,34 @@ vec_vrfin(vector float __a) { return __builtin_altivec_vrfin(__a); } +/* vec_sqrt */ + +#ifdef __VSX__ +static vector float __ATTRS_o_ai vec_sqrt(vector float __a) { + return __builtin_vsx_xvsqrtsp(__a); +} + +static vector double __ATTRS_o_ai vec_sqrt(vector double __a) { + return __builtin_vsx_xvsqrtdp(__a); +} +#endif + /* vec_rsqrte */ -static __vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_rsqrte(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrsqrtesp(__a); +#else return __builtin_altivec_vrsqrtefp(__a); +#endif +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_rsqrte(vector double __a) { + return __builtin_vsx_xvrsqrtedp(__a); } +#endif /* vec_vrsqrtefp */ @@ -5381,6 +6333,22 @@ static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b, return (vector float)__res; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, + vector bool long long __c) { + vector long long __res = ((vector long long)__a & ~(vector long long)__c) | + ((vector long long)__b & (vector long long)__c); + return (vector double)__res; +} + +static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, + vector unsigned long long __c) { + vector long long __res = ((vector long long)__a & ~(vector long long)__c) | + ((vector long long)__b & (vector long long)__c); + return (vector double)__res; +} +#endif + /* vec_vsel */ static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a, @@ -5593,78 +6561,121 @@ static vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a, static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a, vector signed char __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a, vector unsigned char __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } -static vector short __ATTRS_o_ai vec_sld(vector short __a, vector short __b, - unsigned char __c) { +static vector bool char __ATTRS_o_ai vec_sld(vector bool char __a, + vector bool char __b, + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); +} + +static vector signed short __ATTRS_o_ai vec_sld(vector signed short __a, + vector signed short __b, + unsigned const int __c) { + unsigned char __d = __c & 0x0F; + return vec_perm( + __a, __b, + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a, vector unsigned short __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); +} + +static vector bool short __ATTRS_o_ai vec_sld(vector bool short __a, + vector bool short __b, + unsigned const int __c) { + unsigned char __d = __c & 0x0F; + return vec_perm( + __a, __b, + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } -static vector int __ATTRS_o_ai vec_sld(vector int __a, vector int __b, - unsigned char __c) { +static vector signed int __ATTRS_o_ai vec_sld(vector signed int __a, + vector signed int __b, + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a, vector unsigned int __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); +} + +static vector bool int __ATTRS_o_ai vec_sld(vector bool int __a, + vector bool int __b, + unsigned const int __c) { + unsigned char __d = __c & 0x0F; + return vec_perm( + __a, __b, + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b, - unsigned char __c) { + unsigned const int __c) { + unsigned char __d = __c & 0x0F; return vec_perm( __a, __b, - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5, - __c + 6, __c + 7, __c + 8, __c + 9, __c + 10, - __c + 11, __c + 12, __c + 13, __c + 14, __c + 15)); + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, + __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, + __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); } /* vec_vsldoi */ @@ -6273,91 +7284,131 @@ static vector float __ATTRS_o_ai vec_vslo(vector float __a, /* vec_splat */ static vector signed char __ATTRS_o_ai vec_splat(vector signed char __a, - unsigned char __b) { - return vec_perm(__a, __a, (vector unsigned char)(__b)); + unsigned const int __b) { + return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } static vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char __a, - unsigned char __b) { - return vec_perm(__a, __a, (vector unsigned char)(__b)); + unsigned const int __b) { + return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } static vector bool char __ATTRS_o_ai vec_splat(vector bool char __a, - unsigned char __b) { - return vec_perm(__a, __a, (vector unsigned char)(__b)); + unsigned const int __b) { + return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } -static vector short __ATTRS_o_ai vec_splat(vector short __a, - unsigned char __b) { - __b *= 2; - unsigned char b1 = __b + 1; +static vector signed short __ATTRS_o_ai vec_splat(vector signed short __a, + unsigned const int __b) { + unsigned char b0 = (__b & 0x07) * 2; + unsigned char b1 = b0 + 1; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, - __b, b1, __b, b1, __b, b1, __b, b1)); + (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, + b0, b1, b0, b1, b0, b1, b0, b1)); } static vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short __a, - unsigned char __b) { - __b *= 2; - unsigned char b1 = __b + 1; + unsigned const int __b) { + unsigned char b0 = (__b & 0x07) * 2; + unsigned char b1 = b0 + 1; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, - __b, b1, __b, b1, __b, b1, __b, b1)); + (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, + b0, b1, b0, b1, b0, b1, b0, b1)); } static vector bool short __ATTRS_o_ai vec_splat(vector bool short __a, - unsigned char __b) { - __b *= 2; - unsigned char b1 = __b + 1; + unsigned const int __b) { + unsigned char b0 = (__b & 0x07) * 2; + unsigned char b1 = b0 + 1; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, - __b, b1, __b, b1, __b, b1, __b, b1)); + (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, + b0, b1, b0, b1, b0, b1, b0, b1)); } static vector pixel __ATTRS_o_ai vec_splat(vector pixel __a, - unsigned char __b) { - __b *= 2; - unsigned char b1 = __b + 1; + unsigned const int __b) { + unsigned char b0 = (__b & 0x07) * 2; + unsigned char b1 = b0 + 1; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, - __b, b1, __b, b1, __b, b1, __b, b1)); + (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, + b0, b1, b0, b1, b0, b1, b0, b1)); } -static vector int __ATTRS_o_ai vec_splat(vector int __a, unsigned char __b) { - __b *= 4; - unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; +static vector signed int __ATTRS_o_ai vec_splat(vector signed int __a, + unsigned const int __b) { + unsigned char b0 = (__b & 0x03) * 4; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, - b1, b2, b3, __b, b1, b2, b3)); + (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, + b1, b2, b3, b0, b1, b2, b3)); } static vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a, - unsigned char __b) { - __b *= 4; - unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; + unsigned const int __b) { + unsigned char b0 = (__b & 0x03) * 4; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, - b1, b2, b3, __b, b1, b2, b3)); + (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, + b1, b2, b3, b0, b1, b2, b3)); } static vector bool int __ATTRS_o_ai vec_splat(vector bool int __a, - unsigned char __b) { - __b *= 4; - unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; + unsigned const int __b) { + unsigned char b0 = (__b & 0x03) * 4; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, - b1, b2, b3, __b, b1, b2, b3)); + (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, + b1, b2, b3, b0, b1, b2, b3)); } static vector float __ATTRS_o_ai vec_splat(vector float __a, - unsigned char __b) { - __b *= 4; - unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; + unsigned const int __b) { + unsigned char b0 = (__b & 0x03) * 4; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, - (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, - b1, b2, b3, __b, b1, b2, b3)); + (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, + b1, b2, b3, b0, b1, b2, b3)); } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_splat(vector double __a, + unsigned const int __b) { + unsigned char b0 = (__b & 0x01) * 8; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, + b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; + return vec_perm(__a, __a, + (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, + b0, b1, b2, b3, b4, b5, b6, b7)); +} +static vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a, + unsigned const int __b) { + unsigned char b0 = (__b & 0x01) * 8; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, + b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; + return vec_perm(__a, __a, + (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, + b0, b1, b2, b3, b4, b5, b6, b7)); +} +static vector signed long long __ATTRS_o_ai +vec_splat(vector signed long long __a, unsigned const int __b) { + unsigned char b0 = (__b & 0x01) * 8; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, + b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; + return vec_perm(__a, __a, + (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, + b0, b1, b2, b3, b4, b5, b6, b7)); +} +static vector unsigned long long __ATTRS_o_ai +vec_splat(vector unsigned long long __a, unsigned const int __b) { + unsigned char b0 = (__b & 0x01) * 8; + unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, + b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; + return vec_perm(__a, __a, + (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, + b0, b1, b2, b3, b4, b5, b6, b7)); +} +#endif + /* vec_vspltb */ #define __builtin_altivec_vspltb vec_vspltb @@ -6529,7 +7580,8 @@ static vector unsigned int __ATTRS_o_ai vec_splat_u32(signed char __a) { static vector signed char __ATTRS_o_ai vec_sr(vector signed char __a, vector unsigned char __b) { - return __a >> (vector signed char)__b; + vector unsigned char __res = (vector unsigned char)__a >> __b; + return (vector signed char)__res; } static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, @@ -6537,9 +7589,10 @@ static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, return __a >> __b; } -static vector short __ATTRS_o_ai vec_sr(vector short __a, +static vector signed short __ATTRS_o_ai vec_sr(vector signed short __a, vector unsigned short __b) { - return __a >> (vector short)__b; + vector unsigned short __res = (vector unsigned short)__a >> __b; + return (vector signed short)__res; } static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a, @@ -6547,8 +7600,10 @@ static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a, return __a >> __b; } -static vector int __ATTRS_o_ai vec_sr(vector int __a, vector unsigned int __b) { - return __a >> (vector int)__b; +static vector signed int __ATTRS_o_ai vec_sr(vector signed int __a, + vector unsigned int __b) { + vector unsigned int __res = (vector unsigned int)__a >> __b; + return (vector signed int)__res; } static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a, @@ -6559,7 +7614,8 @@ static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a, #ifdef __POWER8_VECTOR__ static vector signed long long __ATTRS_o_ai vec_sr(vector signed long long __a, vector unsigned long long __b) { - return __a >> (vector long long)__b; + vector unsigned long long __res = (vector unsigned long long)__a >> __b; + return (vector signed long long)__res; } static vector unsigned long long __ATTRS_o_ai @@ -7960,6 +9016,13 @@ static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) { return __a - __b; } +#ifdef __VSX__ +static vector double __ATTRS_o_ai +vec_sub(vector double __a, vector double __b) { + return __a - __b; +} +#endif + /* vec_vsububm */ #define __builtin_altivec_vsububm vec_vsububm @@ -8451,11 +9514,21 @@ vec_vsumsws(vector signed int __a, vector signed int __b) { /* vec_trunc */ -static vector float __attribute__((__always_inline__)) +static vector float __ATTRS_o_ai vec_trunc(vector float __a) { +#ifdef __VSX__ + return __builtin_vsx_xvrspiz(__a); +#else return __builtin_altivec_vrfiz(__a); +#endif } +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_trunc(vector double __a) { + return __builtin_vsx_xvrdpiz(__a); +} +#endif + /* vec_vrfiz */ static vector float __attribute__((__always_inline__)) @@ -8945,6 +10018,24 @@ static vector bool long long __ATTRS_o_ai vec_xor(vector bool long long __a, vector bool long long __b) { return __a ^ __b; } + +static vector double __ATTRS_o_ai +vec_xor(vector double __a, vector double __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long)__b); +} + +static vector double __ATTRS_o_ai +vec_xor(vector double __a, vector bool long long __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long) __b); +} + +static vector double __ATTRS_o_ai +vec_xor(vector bool long long __a, vector double __b) { + return (vector double)((vector unsigned long long)__a ^ + (vector unsigned long long)__b); +} #endif /* vec_vxor */ diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h index 59849e4..eb198a5 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h @@ -777,6 +777,1051 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { _mm_setzero_si128 (), (__mmask8) __U); } + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) _mm_setzero_si128 (), __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) __W, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) __W, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A, + (__v4si) __B, + (__v8hi) __W, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A, + (__v8si) __B, + (__v16hi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v16qi) __W, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v32qi) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) __W, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__v16qi) _mm_setzero_si128 (), + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) __W, + (__mmask32) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__v32qi) _mm256_setzero_si256 (), + (__mmask32) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A, + (__v8hi) __B, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) __W, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A, + (__v16hi) __B, + (__v16hi) _mm256_setzero_si256 (), + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, + (__v8hi) __I /* idx */ , + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I, + __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, + (__v16hi) __I /* idx */ , + (__v16hi) __B, + (__mmask16) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) +{ + return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I/* idx */, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U, + __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, + __m256i __I, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I/* idx */, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), \ |