summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/Headers
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers')
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/Intrin.h154
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/altivec.h1319
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h1045
3 files changed, 2323 insertions, 195 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
index 7ba311e..24b3eae 100644
--- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
@@ -513,47 +513,40 @@ _BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
return 1;
}
static __inline__ unsigned short __DEFAULT_FN_ATTRS
-__popcnt16(unsigned short value) {
- return __builtin_popcount((int)value);
+__popcnt16(unsigned short _Value) {
+ return __builtin_popcount((int)_Value);
}
static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__popcnt(unsigned int value) {
- return __builtin_popcount(value);
+__popcnt(unsigned int _Value) {
+ return __builtin_popcount(_Value);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittest(long const *a, long b) {
- return (*a >> b) & 1;
+_bittest(long const *_BitBase, long _BitPos) {
+ return (*_BitBase >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandcomplement(long *a, long b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a ^ (1 << b);
- return x;
+_bittestandcomplement(long *_BitBase, long _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase ^ (1 << _BitPos);
+ return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandreset(long *a, long b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a & ~(1 << b);
- return x;
+_bittestandreset(long *_BitBase, long _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase & ~(1 << _BitPos);
+ return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandset(long *a, long b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a | (1 << b);
- return x;
+_bittestandset(long *_BitBase, long _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase | (1 << _BitPos);
+ return _Res;
}
-#if defined(__i386__) || defined(__x86_64__)
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset(long volatile *__BitBase, long __BitPos) {
- unsigned char __Res;
- __asm__ ("xor %0, %0\n"
- "lock bts %2, %1\n"
- "setc %0\n"
- : "=r" (__Res), "+m"(*__BitBase)
- : "Ir"(__BitPos));
- return __Res;
+_interlockedbittestandset(long volatile *_BitBase, long _BitPos) {
+ long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_SEQ_CST);
+ return (_PrevVal >> _BitPos) & 1;
}
-#endif
#ifdef __x86_64__
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
@@ -571,40 +564,36 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
}
static __inline__
unsigned __int64 __DEFAULT_FN_ATTRS
- __popcnt64(unsigned __int64 value) {
- return __builtin_popcountll(value);
+__popcnt64(unsigned __int64 _Value) {
+ return __builtin_popcountll(_Value);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittest64(__int64 const *a, __int64 b) {
- return (*a >> b) & 1;
+_bittest64(__int64 const *_BitBase, __int64 _BitPos) {
+ return (*_BitBase >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandcomplement64(__int64 *a, __int64 b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a ^ (1ll << b);
- return x;
+_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase ^ (1ll << _BitPos);
+ return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandreset64(__int64 *a, __int64 b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a & ~(1ll << b);
- return x;
+_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase & ~(1ll << _BitPos);
+ return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandset64(__int64 *a, __int64 b) {
- unsigned char x = (*a >> b) & 1;
- *a = *a | (1ll << b);
- return x;
+_bittestandset64(__int64 *_BitBase, __int64 _BitPos) {
+ unsigned char _Res = (*_BitBase >> _BitPos) & 1;
+ *_BitBase = *_BitBase | (1ll << _BitPos);
+ return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
- unsigned char __Res;
- __asm__ ("xor %0, %0\n"
- "lock bts %2, %1\n"
- "setc %0\n"
- : "=r" (__Res), "+m"(*__BitBase)
- : "Ir"(__BitPos));
- return __Res;
+_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) {
+ long long _PrevVal =
+ __atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST);
+ return (_PrevVal >> _BitPos) & 1;
}
#endif
/*----------------------------------------------------------------------------*\
@@ -612,16 +601,16 @@ _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
- return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
+ return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
- return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
+ return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
- return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
+ return __atomic_fetch_add(_Addend, _Value, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -629,20 +618,20 @@ _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
- return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
+ return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
- return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
+ return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchangeSub(long volatile *_Subend, long _Value) {
- return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
+ return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
- return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
+ return __atomic_fetch_sub(_Subend, _Value, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -650,12 +639,12 @@ _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
\*----------------------------------------------------------------------------*/
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedIncrement16(short volatile *_Value) {
- return __atomic_add_fetch(_Value, 1, 0);
+ return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedIncrement64(__int64 volatile *_Value) {
- return __atomic_add_fetch(_Value, 1, 0);
+ return __atomic_add_fetch(_Value, 1, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -663,12 +652,12 @@ _InterlockedIncrement64(__int64 volatile *_Value) {
\*----------------------------------------------------------------------------*/
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedDecrement16(short volatile *_Value) {
- return __atomic_sub_fetch(_Value, 1, 0);
+ return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedDecrement64(__int64 volatile *_Value) {
- return __atomic_sub_fetch(_Value, 1, 0);
+ return __atomic_sub_fetch(_Value, 1, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -676,20 +665,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedAnd8(char volatile *_Value, char _Mask) {
- return __atomic_and_fetch(_Value, _Mask, 0);
+ return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedAnd16(short volatile *_Value, short _Mask) {
- return __atomic_and_fetch(_Value, _Mask, 0);
+ return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedAnd(long volatile *_Value, long _Mask) {
- return __atomic_and_fetch(_Value, _Mask, 0);
+ return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
- return __atomic_and_fetch(_Value, _Mask, 0);
+ return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -697,20 +686,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedOr8(char volatile *_Value, char _Mask) {
- return __atomic_or_fetch(_Value, _Mask, 0);
+ return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedOr16(short volatile *_Value, short _Mask) {
- return __atomic_or_fetch(_Value, _Mask, 0);
+ return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedOr(long volatile *_Value, long _Mask) {
- return __atomic_or_fetch(_Value, _Mask, 0);
+ return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
- return __atomic_or_fetch(_Value, _Mask, 0);
+ return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -718,20 +707,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedXor8(char volatile *_Value, char _Mask) {
- return __atomic_xor_fetch(_Value, _Mask, 0);
+ return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedXor16(short volatile *_Value, short _Mask) {
- return __atomic_xor_fetch(_Value, _Mask, 0);
+ return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedXor(long volatile *_Value, long _Mask) {
- return __atomic_xor_fetch(_Value, _Mask, 0);
+ return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
- return __atomic_xor_fetch(_Value, _Mask, 0);
+ return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
}
#endif
/*----------------------------------------------------------------------------*\
@@ -739,18 +728,18 @@ _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
\*----------------------------------------------------------------------------*/
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchange8(char volatile *_Target, char _Value) {
- __atomic_exchange(_Target, &_Value, &_Value, 0);
+ __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST);
return _Value;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchange16(short volatile *_Target, short _Value) {
- __atomic_exchange(_Target, &_Value, &_Value, 0);
+ __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST);
return _Value;
}
#ifdef __x86_64__
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
- __atomic_exchange(_Target, &_Value, &_Value, 0);
+ __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_SEQ_CST);
return _Value;
}
#endif
@@ -760,19 +749,22 @@ _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedCompareExchange8(char volatile *_Destination,
char _Exchange, char _Comparand) {
- __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
+ __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedCompareExchange16(short volatile *_Destination,
short _Exchange, short _Comparand) {
- __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
+ __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
- __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
+ __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return _Comparand;
}
/*----------------------------------------------------------------------------*\
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
index 2c80e24..f52bcbc 100644
--- a/contrib/llvm/tools/clang/lib/Headers/altivec.h
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -48,7 +48,8 @@ static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
vector bool char __b,
vector unsigned char __c);
-static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b,
+static vector short __ATTRS_o_ai vec_perm(vector signed short __a,
+ vector signed short __b,
vector unsigned char __c);
static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
@@ -62,7 +63,8 @@ static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
vector unsigned char __c);
-static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b,
+static vector int __ATTRS_o_ai vec_perm(vector signed int __a,
+ vector signed int __b,
vector unsigned char __c);
static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
@@ -77,14 +79,18 @@ static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
vector unsigned char __c);
#ifdef __VSX__
-static vector long long __ATTRS_o_ai vec_perm(vector long long __a,
- vector long long __b,
+static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a,
+ vector signed long long __b,
vector unsigned char __c);
static vector unsigned long long __ATTRS_o_ai
vec_perm(vector unsigned long long __a, vector unsigned long long __b,
vector unsigned char __c);
+static vector bool long long __ATTRS_o_ai
+vec_perm(vector bool long long __a, vector bool long long __b,
+ vector unsigned char __c);
+
static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
vector unsigned char __c);
#endif
@@ -1735,6 +1741,48 @@ static vector bool long long __ATTRS_o_ai
vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
return vec_cmpgt(__b, __a);
}
+
+/* vec_cntlz */
+
+static vector signed char __ATTRS_o_ai vec_cntlz(vector signed char __a) {
+ return __builtin_altivec_vclzb(__a);
+}
+static vector unsigned char __ATTRS_o_ai vec_cntlz(vector unsigned char __a) {
+ return __builtin_altivec_vclzb(__a);
+}
+static vector signed short __ATTRS_o_ai vec_cntlz(vector signed short __a) {
+ return __builtin_altivec_vclzh(__a);
+}
+static vector unsigned short __ATTRS_o_ai vec_cntlz(vector unsigned short __a) {
+ return __builtin_altivec_vclzh(__a);
+}
+static vector signed int __ATTRS_o_ai vec_cntlz(vector signed int __a) {
+ return __builtin_altivec_vclzw(__a);
+}
+static vector unsigned int __ATTRS_o_ai vec_cntlz(vector unsigned int __a) {
+ return __builtin_altivec_vclzw(__a);
+}
+static vector signed long long __ATTRS_o_ai
+vec_cntlz(vector signed long long __a) {
+ return __builtin_altivec_vclzd(__a);
+}
+static vector unsigned long long __ATTRS_o_ai
+vec_cntlz(vector unsigned long long __a) {
+ return __builtin_altivec_vclzd(__a);
+}
+#endif
+
+/* vec_cpsgn */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai vec_cpsgn(vector float __a, vector float __b) {
+ return __builtin_vsx_xvcpsgnsp(__a, __b);
+}
+
+static vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
+ vector double __b) {
+ return __builtin_vsx_xvcpsgndp(__a, __b);
+}
#endif
/* vec_ctf */
@@ -1790,14 +1838,58 @@ vec_vctuxs(vector float __a, int __b) {
}
/* vec_div */
+
+/* Integer vector divides (vectors are scalarized, elements divided
+ and the vectors reassembled).
+*/
+static vector signed char __ATTRS_o_ai vec_div(vector signed char __a,
+ vector signed char __b) {
+ return __a / __b;
+}
+
+static vector unsigned char __ATTRS_o_ai vec_div(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __a / __b;
+}
+
+static vector signed short __ATTRS_o_ai vec_div(vector signed short __a,
+ vector signed short __b) {
+ return __a / __b;
+}
+
+static vector unsigned short __ATTRS_o_ai vec_div(vector unsigned short __a,
+ vector unsigned short __b) {
+ return __a / __b;
+}
+
+static vector signed int __ATTRS_o_ai vec_div(vector signed int __a,
+ vector signed int __b) {
+ return __a / __b;
+}
+
+static vector unsigned int __ATTRS_o_ai vec_div(vector unsigned int __a,
+ vector unsigned int __b) {
+ return __a / __b;
+}
+
#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_div(vector signed long long __a, vector signed long long __b) {
+ return __a / __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_div(vector unsigned long long __a, vector unsigned long long __b) {
+ return __a / __b;
+}
+
static vector float __ATTRS_o_ai vec_div(vector float __a, vector float __b) {
- return __builtin_vsx_xvdivsp(__a, __b);
+ return __a / __b;
}
static vector double __ATTRS_o_ai vec_div(vector double __a,
vector double __b) {
- return __builtin_vsx_xvdivdp(__a, __b);
+ return __a / __b;
}
#endif
@@ -1841,6 +1933,189 @@ vec_dstt(const void *__a, int __b, int __c) {
__builtin_altivec_dstt(__a, __b, __c);
}
+/* vec_eqv */
+
+#ifdef __POWER8_VECTOR__
+static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
+ vector signed char __b) {
+ return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed char __ATTRS_o_ai vec_eqv(vector bool char __a,
+ vector signed char __b) {
+ return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
+ vector bool char __b) {
+ return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
+ vector unsigned char __b) {
+ return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_eqv(vector bool char __a,
+ vector unsigned char __b) {
+ return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
+ vector bool char __b) {
+ return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
+ vector signed short __b) {
+ return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed short __ATTRS_o_ai vec_eqv(vector bool short __a,
+ vector signed short __b) {
+ return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
+ vector bool short __b) {
+ return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
+ vector unsigned short __b) {
+ return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned short __ATTRS_o_ai vec_eqv(vector bool short __a,
+ vector unsigned short __b) {
+ return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
+ vector bool short __b) {
+ return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
+ vector signed int __b) {
+ return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed int __ATTRS_o_ai vec_eqv(vector bool int __a,
+ vector signed int __b) {
+ return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
+ vector bool int __b) {
+ return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
+ vector unsigned int __b) {
+ return __builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_eqv(vector bool int __a,
+ vector unsigned int __b) {
+ return __builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
+ vector bool int __b) {
+ return __builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_eqv(vector signed long long __a, vector signed long long __b) {
+ return (vector signed long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_eqv(vector bool long long __a, vector signed long long __b) {
+ return (vector signed long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_eqv(vector signed long long __a, vector bool long long __b) {
+ return (vector signed long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_eqv(vector unsigned long long __a, vector unsigned long long __b) {
+ return (vector unsigned long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_eqv(vector bool long long __a, vector unsigned long long __b) {
+ return (vector unsigned long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_eqv(vector unsigned long long __a, vector bool long long __b) {
+ return (vector unsigned long long)
+ __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+}
+
+static vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) {
+ return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector float __ATTRS_o_ai vec_eqv(vector bool int __a,
+ vector float __b) {
+ return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector float __ATTRS_o_ai vec_eqv(vector float __a,
+ vector bool int __b) {
+ return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector double __ATTRS_o_ai vec_eqv(vector double __a,
+ vector double __b) {
+ return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector double __ATTRS_o_ai vec_eqv(vector bool long long __a,
+ vector double __b) {
+ return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+
+static vector double __ATTRS_o_ai vec_eqv(vector double __a,
+ vector bool long long __b) {
+ return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
+ (vector unsigned int)__b);
+}
+#endif
+
/* vec_expte */
static vector float __attribute__((__always_inline__))
@@ -1857,10 +2132,19 @@ vec_vexptefp(vector float __a) {
/* vec_floor */
-static vector float __attribute__((__always_inline__))
-vec_floor(vector float __a) {
+static vector float __ATTRS_o_ai vec_floor(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvrspim(__a);
+#else
return __builtin_altivec_vrfim(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_floor(vector double __a) {
+ return __builtin_vsx_xvrdpim(__a);
}
+#endif
/* vec_vrfim */
@@ -2532,10 +2816,21 @@ static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) {
/* vec_madd */
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
vec_madd(vector float __a, vector float __b, vector float __c) {
+#ifdef __VSX__
+ return __builtin_vsx_xvmaddasp(__a, __b, __c);
+#else
return __builtin_altivec_vmaddfp(__a, __b, __c);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_madd(vector double __a, vector double __b, vector double __c) {
+ return __builtin_vsx_xvmaddadp(__a, __b, __c);
}
+#endif
/* vec_vmaddfp */
@@ -2559,6 +2854,20 @@ vec_vmhaddshs(vector signed short __a, vector signed short __b,
return __builtin_altivec_vmhaddshs(__a, __b, __c);
}
+/* vec_msub */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai
+vec_msub(vector float __a, vector float __b, vector float __c) {
+ return __builtin_vsx_xvmsubasp(__a, __b, __c);
+}
+
+static vector double __ATTRS_o_ai
+vec_msub(vector double __a, vector double __b, vector double __c) {
+ return __builtin_vsx_xvmsubadp(__a, __b, __c);
+}
+#endif
+
/* vec_max */
static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
@@ -2893,6 +3202,86 @@ static vector float __ATTRS_o_ai vec_mergeh(vector float __a,
0x14, 0x15, 0x16, 0x17));
}
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_mergeh(vector signed long long __a, vector signed long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_mergeh(vector signed long long __a, vector bool long long __b) {
+ return vec_perm(__a, (vector signed long long)__b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_mergeh(vector bool long long __a, vector signed long long __b) {
+ return vec_perm((vector signed long long)__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_mergeh(vector unsigned long long __a, vector bool long long __b) {
+ return vec_perm(__a, (vector unsigned long long)__b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_mergeh(vector bool long long __a, vector unsigned long long __b) {
+ return vec_perm((vector unsigned long long)__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
+ vector double __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
+ vector bool long long __b) {
+ return vec_perm(__a, (vector double)__b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+static vector double __ATTRS_o_ai vec_mergeh(vector bool long long __a,
+ vector double __b) {
+ return vec_perm((vector double)__a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17));
+}
+#endif
+
/* vec_vmrghb */
#define __builtin_altivec_vmrghb vec_vmrghb
@@ -3081,6 +3470,81 @@ static vector float __ATTRS_o_ai vec_mergel(vector float __a,
0x1C, 0x1D, 0x1E, 0x1F));
}
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_mergel(vector signed long long __a, vector signed long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector signed long long __ATTRS_o_ai
+vec_mergel(vector signed long long __a, vector bool long long __b) {
+ return vec_perm(__a, (vector signed long long)__b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector signed long long __ATTRS_o_ai
+vec_mergel(vector bool long long __a, vector signed long long __b) {
+ return vec_perm((vector signed long long)__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector unsigned long long __ATTRS_o_ai
+vec_mergel(vector unsigned long long __a, vector unsigned long long __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector unsigned long long __ATTRS_o_ai
+vec_mergel(vector unsigned long long __a, vector bool long long __b) {
+ return vec_perm(__a, (vector unsigned long long)__b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector unsigned long long __ATTRS_o_ai
+vec_mergel(vector bool long long __a, vector unsigned long long __b) {
+ return vec_perm((vector unsigned long long)__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector double __ATTRS_o_ai
+vec_mergel(vector double __a, vector double __b) {
+ return vec_perm(__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector double __ATTRS_o_ai
+vec_mergel(vector double __a, vector bool long long __b) {
+ return vec_perm(__a, (vector double)__b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+static vector double __ATTRS_o_ai
+vec_mergel(vector bool long long __a, vector double __b) {
+ return vec_perm((vector double)__a, __b,
+ (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x18, 0X19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F));
+}
+#endif
+
/* vec_vmrglb */
#define __builtin_altivec_vmrglb vec_vmrglb
@@ -3677,6 +4141,65 @@ static void __ATTRS_o_ai vec_mtvscr(vector float __a) {
__builtin_altivec_mtvscr((vector int)__a);
}
+/* vec_mul */
+
+/* Integer vector multiplication will involve multiplication of the odd/even
+ elements separately, then truncating the results and moving to the
+ result vector.
+*/
+static vector signed char __ATTRS_o_ai vec_mul(vector signed char __a,
+ vector signed char __b) {
+ return __a * __b;
+}
+
+static vector unsigned char __ATTRS_o_ai vec_mul(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __a * __b;
+}
+
+static vector signed short __ATTRS_o_ai vec_mul(vector signed short __a,
+ vector signed short __b) {
+ return __a * __b;
+}
+
+static vector unsigned short __ATTRS_o_ai vec_mul(vector unsigned short __a,
+ vector unsigned short __b) {
+ return __a * __b;
+}
+
+static vector signed int __ATTRS_o_ai vec_mul(vector signed int __a,
+ vector signed int __b) {
+ return __a * __b;
+}
+
+static vector unsigned int __ATTRS_o_ai vec_mul(vector unsigned int __a,
+ vector unsigned int __b) {
+ return __a * __b;
+}
+
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_mul(vector signed long long __a, vector signed long long __b) {
+ return __a * __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_mul(vector unsigned long long __a, vector unsigned long long __b) {
+ return __a * __b;
+}
+#endif
+
+static vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) {
+ return __a * __b;
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_mul(vector double __a, vector double __b) {
+ return __a * __b;
+}
+#endif
+
/* The vmulos* and vmules* instructions have a big endian bias, so
we must reverse the meaning of "even" and "odd" for little endian. */
@@ -3882,12 +4405,165 @@ vec_vmulouh(vector unsigned short __a, vector unsigned short __b) {
#endif
}
+/* vec_nand */
+
+#ifdef __POWER8_VECTOR__
+static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a,
+ vector signed char __b) {
+ return ~(__a & __b);
+}
+
+static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a,
+ vector bool char __b) {
+ return ~(__a & __b);
+}
+
+static vector signed char __ATTRS_o_ai vec_nand(vector bool char __a,
+ vector signed char __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a,
+ vector unsigned char __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a,
+ vector bool char __b) {
+ return ~(__a & __b);
+
+}
+
+static vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a,
+ vector unsigned char __b) {
+ return ~(__a & __b);
+}
+
+static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
+ vector signed short __b) {
+ return ~(__a & __b);
+}
+
+static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
+ vector bool short __b) {
+ return ~(__a & __b);
+}
+
+static vector signed short __ATTRS_o_ai vec_nand(vector bool short __a,
+ vector signed short __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
+ vector unsigned short __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
+ vector bool short __b) {
+ return ~(__a & __b);
+
+}
+
+static vector unsigned short __ATTRS_o_ai vec_nand(vector bool short __a,
+ vector unsigned short __b) {
+ return ~(__a & __b);
+
+}
+
+static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a,
+ vector signed int __b) {
+ return ~(__a & __b);
+}
+
+static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a,
+ vector bool int __b) {
+ return ~(__a & __b);
+}
+
+static vector signed int __ATTRS_o_ai vec_nand(vector bool int __a,
+ vector signed int __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a,
+ vector unsigned int __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a,
+ vector bool int __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a,
+ vector unsigned int __b) {
+ return ~(__a & __b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_nand(vector signed long long __a, vector signed long long __b) {
+ return ~(__a & __b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_nand(vector signed long long __a, vector bool long long __b) {
+ return ~(__a & __b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_nand(vector bool long long __a, vector signed long long __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_nand(vector unsigned long long __a, vector unsigned long long __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_nand(vector unsigned long long __a, vector bool long long __b) {
+ return ~(__a & __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_nand(vector bool long long __a, vector unsigned long long __b) {
+ return ~(__a & __b);
+}
+
+#endif
+
+/* vec_nmadd */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai
+vec_nmadd(vector float __a, vector float __b, vector float __c) {
+ return __builtin_vsx_xvnmaddasp(__a, __b, __c);
+}
+
+static vector double __ATTRS_o_ai
+vec_nmadd(vector double __a, vector double __b, vector double __c) {
+ return __builtin_vsx_xvnmaddadp(__a, __b, __c);
+}
+#endif
+
/* vec_nmsub */
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
vec_nmsub(vector float __a, vector float __b, vector float __c) {
+#ifdef __VSX__
+ return __builtin_vsx_xvnmsubasp(__a, __b, __c);
+#else
return __builtin_altivec_vnmsubfp(__a, __b, __c);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_nmsub(vector double __a, vector double __b, vector double __c) {
+ return __builtin_vsx_xvnmsubadp(__a, __b, __c);
}
+#endif
/* vec_vnmsubfp */
@@ -3949,6 +4625,15 @@ static vector float __ATTRS_o_ai vec_nor(vector float __a, vector float __b) {
return (vector float)__res;
}
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_nor(vector double __a, vector double __b) {
+ vector unsigned long long __res =
+ ~((vector unsigned long long)__a | (vector unsigned long long)__b);
+ return (vector double)__res;
+}
+#endif
+
/* vec_vnor */
static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a,
@@ -4141,6 +4826,22 @@ static vector float __ATTRS_o_ai vec_or(vector float __a, vector bool int __b) {
}
#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_or(vector bool long long __a,
+ vector double __b) {
+ return (vector unsigned long long)__a | (vector unsigned long long)__b;
+}
+
+static vector double __ATTRS_o_ai vec_or(vector double __a,
+ vector bool long long __b) {
+ return (vector unsigned long long)__a | (vector unsigned long long)__b;
+}
+
+static vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) {
+ vector unsigned long long __res =
+ (vector unsigned long long)__a | (vector unsigned long long)__b;
+ return (vector double)__res;
+}
+
static vector signed long long __ATTRS_o_ai
vec_or(vector signed long long __a, vector signed long long __b) {
return __a | __b;
@@ -4177,6 +4878,128 @@ static vector bool long long __ATTRS_o_ai vec_or(vector bool long long __a,
}
#endif
+#ifdef __POWER8_VECTOR__
+static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a,
+ vector signed char __b) {
+ return __a | ~__b;
+}
+
+static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a,
+ vector bool char __b) {
+ return __a | ~__b;
+}
+
+static vector signed char __ATTRS_o_ai vec_orc(vector bool char __a,
+ vector signed char __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a,
+ vector bool char __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a,
+ vector unsigned char __b) {
+ return __a | ~__b;
+}
+
+static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
+ vector signed short __b) {
+ return __a | ~__b;
+}
+
+static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
+ vector bool short __b) {
+ return __a | ~__b;
+}
+
+static vector signed short __ATTRS_o_ai vec_orc(vector bool short __a,
+ vector signed short __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a,
+ vector unsigned short __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a,
+ vector bool short __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned short __ATTRS_o_ai
+vec_orc(vector bool short __a, vector unsigned short __b) {
+ return __a | ~__b;
+}
+
+static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
+ vector signed int __b) {
+ return __a | ~__b;
+}
+
+static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
+ vector bool int __b) {
+ return __a | ~__b;
+}
+
+static vector signed int __ATTRS_o_ai vec_orc(vector bool int __a,
+ vector signed int __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a,
+ vector unsigned int __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a,
+ vector bool int __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a,
+ vector unsigned int __b) {
+ return __a | ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_orc(vector signed long long __a, vector signed long long __b) {
+ return __a | ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai vec_orc(vector signed long long __a,
+ vector bool long long __b) {
+ return __a | ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_orc(vector bool long long __a, vector signed long long __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_orc(vector unsigned long long __a, vector bool long long __b) {
+ return __a | ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_orc(vector bool long long __a, vector unsigned long long __b) {
+ return __a | ~__b;
+}
+#endif
+
/* vec_vor */
static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a,
@@ -4431,6 +5254,53 @@ static vector bool short __ATTRS_o_ai vec_pack(vector bool int __a,
#endif
}
+#ifdef __VSX__
+static vector signed int __ATTRS_o_ai vec_pack(vector signed long long __a,
+ vector signed long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return (vector signed int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+ return (vector signed int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+static vector unsigned int __ATTRS_o_ai
+vec_pack(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return (vector unsigned int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+ return (vector unsigned int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+
+static vector bool int __ATTRS_o_ai vec_pack(vector bool long long __a,
+ vector bool long long __b) {
+#ifdef __LITTLE_ENDIAN__
+ return (vector bool int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+ return (vector bool int)vec_perm(
+ __a, __b,
+ (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+
+#endif
+
/* vec_vpkuhum */
#define __builtin_altivec_vpkuhum vec_vpkuhum
@@ -4895,17 +5765,18 @@ static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
#endif
}
-static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b,
+static vector short __ATTRS_o_ai vec_perm(vector signed short __a,
+ vector signed short __b,
vector unsigned char __c) {
#ifdef __LITTLE_ENDIAN__
vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255};
__d = vec_xor(__c, __d);
- return (vector short)__builtin_altivec_vperm_4si((vector int)__b,
- (vector int)__a, __d);
+ return (vector signed short)__builtin_altivec_vperm_4si((vector int)__b,
+ (vector int)__a, __d);
#else
- return (vector short)__builtin_altivec_vperm_4si((vector int)__a,
- (vector int)__b, __c);
+ return (vector signed short)__builtin_altivec_vperm_4si((vector int)__a,
+ (vector int)__b, __c);
#endif
}
@@ -4953,15 +5824,16 @@ static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
#endif
}
-static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b,
+static vector int __ATTRS_o_ai vec_perm(vector signed int __a,
+ vector signed int __b,
vector unsigned char __c) {
#ifdef __LITTLE_ENDIAN__
vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255};
__d = vec_xor(__c, __d);
- return (vector int)__builtin_altivec_vperm_4si(__b, __a, __d);
+ return (vector signed int)__builtin_altivec_vperm_4si(__b, __a, __d);
#else
- return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c);
+ return (vector signed int)__builtin_altivec_vperm_4si(__a, __b, __c);
#endif
}
@@ -5010,16 +5882,18 @@ static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
}
#ifdef __VSX__
-static vector long long __ATTRS_o_ai vec_perm(vector long long __a,
- vector long long __b,
+static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a,
+ vector signed long long __b,
vector unsigned char __c) {
#ifdef __LITTLE_ENDIAN__
vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255};
__d = vec_xor(__c, __d);
- return (vector long long)__builtin_altivec_vperm_4si(__b, __a, __d);
+ return (vector signed long long)__builtin_altivec_vperm_4si(
+ (vector int)__b, (vector int)__a, __d);
#else
- return (vector long long)__builtin_altivec_vperm_4si(__a, __b, __c);
+ return (vector signed long long)__builtin_altivec_vperm_4si(
+ (vector int)__a, (vector int)__b, __c);
#endif
}
@@ -5038,6 +5912,21 @@ vec_perm(vector unsigned long long __a, vector unsigned long long __b,
#endif
}
+static vector bool long long __ATTRS_o_ai
+vec_perm(vector bool long long __a, vector bool long long __b,
+ vector unsigned char __c) {
+#ifdef __LITTLE_ENDIAN__
+ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255};
+ __d = vec_xor(__c, __d);
+ return (vector bool long long)__builtin_altivec_vperm_4si(
+ (vector int)__b, (vector int)__a, __d);
+#else
+ return (vector bool long long)__builtin_altivec_vperm_4si(
+ (vector int)__a, (vector int)__b, __c);
+#endif
+}
+
static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
vector unsigned char __c) {
#ifdef __LITTLE_ENDIAN__
@@ -5139,10 +6028,20 @@ static vector double __ATTRS_o_ai vec_vperm(vector double __a,
/* vec_re */
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
vec_re(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvresp(__a);
+#else
return __builtin_altivec_vrefp(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_re(vector double __a) {
+ return __builtin_vsx_xvredp(__a);
}
+#endif
/* vec_vrefp */
@@ -5232,10 +6131,41 @@ static vector unsigned int __ATTRS_o_ai vec_vrlw(vector unsigned int __a,
/* vec_round */
-static vector float __attribute__((__always_inline__))
-vec_round(vector float __a) {
+static vector float __ATTRS_o_ai vec_round(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvrspi(__a);
+#else
return __builtin_altivec_vrfin(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_round(vector double __a) {
+ return __builtin_vsx_xvrdpi(__a);
+}
+
+/* vec_rint */
+
+static vector float __ATTRS_o_ai
+vec_rint(vector float __a) {
+ return __builtin_vsx_xvrspic(__a);
+}
+
+static vector double __ATTRS_o_ai
+vec_rint(vector double __a) {
+ return __builtin_vsx_xvrdpic(__a);
+}
+
+/* vec_nearbyint */
+
+static vector float __ATTRS_o_ai vec_nearbyint(vector float __a) {
+ return __builtin_vsx_xvrspi(__a);
+}
+
+static vector double __ATTRS_o_ai vec_nearbyint(vector double __a) {
+ return __builtin_vsx_xvrdpi(__a);
}
+#endif
/* vec_vrfin */
@@ -5244,12 +6174,34 @@ vec_vrfin(vector float __a) {
return __builtin_altivec_vrfin(__a);
}
+/* vec_sqrt */
+
+#ifdef __VSX__
+static vector float __ATTRS_o_ai vec_sqrt(vector float __a) {
+ return __builtin_vsx_xvsqrtsp(__a);
+}
+
+static vector double __ATTRS_o_ai vec_sqrt(vector double __a) {
+ return __builtin_vsx_xvsqrtdp(__a);
+}
+#endif
+
/* vec_rsqrte */
-static __vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
vec_rsqrte(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvrsqrtesp(__a);
+#else
return __builtin_altivec_vrsqrtefp(__a);
+#endif
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_rsqrte(vector double __a) {
+ return __builtin_vsx_xvrsqrtedp(__a);
}
+#endif
/* vec_vrsqrtefp */
@@ -5381,6 +6333,22 @@ static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b,
return (vector float)__res;
}
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
+ vector bool long long __c) {
+ vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
+ ((vector long long)__b & (vector long long)__c);
+ return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
+ vector unsigned long long __c) {
+ vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
+ ((vector long long)__b & (vector long long)__c);
+ return (vector double)__res;
+}
+#endif
+
/* vec_vsel */
static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
@@ -5593,78 +6561,121 @@ static vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a,
static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a,
vector signed char __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a,
vector unsigned char __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
-static vector short __ATTRS_o_ai vec_sld(vector short __a, vector short __b,
- unsigned char __c) {
+static vector bool char __ATTRS_o_ai vec_sld(vector bool char __a,
+ vector bool char __b,
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
+}
+
+static vector signed short __ATTRS_o_ai vec_sld(vector signed short __a,
+ vector signed short __b,
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
+ return vec_perm(
+ __a, __b,
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a,
vector unsigned short __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
+}
+
+static vector bool short __ATTRS_o_ai vec_sld(vector bool short __a,
+ vector bool short __b,
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
+ return vec_perm(
+ __a, __b,
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
-static vector int __ATTRS_o_ai vec_sld(vector int __a, vector int __b,
- unsigned char __c) {
+static vector signed int __ATTRS_o_ai vec_sld(vector signed int __a,
+ vector signed int __b,
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a,
vector unsigned int __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
+}
+
+static vector bool int __ATTRS_o_ai vec_sld(vector bool int __a,
+ vector bool int __b,
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
+ return vec_perm(
+ __a, __b,
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b,
- unsigned char __c) {
+ unsigned const int __c) {
+ unsigned char __d = __c & 0x0F;
return vec_perm(
__a, __b,
- (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
- __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
- __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
+ (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5,
+ __d + 6, __d + 7, __d + 8, __d + 9, __d + 10,
+ __d + 11, __d + 12, __d + 13, __d + 14, __d + 15));
}
/* vec_vsldoi */
@@ -6273,91 +7284,131 @@ static vector float __ATTRS_o_ai vec_vslo(vector float __a,
/* vec_splat */
static vector signed char __ATTRS_o_ai vec_splat(vector signed char __a,
- unsigned char __b) {
- return vec_perm(__a, __a, (vector unsigned char)(__b));
+ unsigned const int __b) {
+ return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
}
static vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char __a,
- unsigned char __b) {
- return vec_perm(__a, __a, (vector unsigned char)(__b));
+ unsigned const int __b) {
+ return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
}
static vector bool char __ATTRS_o_ai vec_splat(vector bool char __a,
- unsigned char __b) {
- return vec_perm(__a, __a, (vector unsigned char)(__b));
+ unsigned const int __b) {
+ return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
}
-static vector short __ATTRS_o_ai vec_splat(vector short __a,
- unsigned char __b) {
- __b *= 2;
- unsigned char b1 = __b + 1;
+static vector signed short __ATTRS_o_ai vec_splat(vector signed short __a,
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x07) * 2;
+ unsigned char b1 = b0 + 1;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
- __b, b1, __b, b1, __b, b1, __b, b1));
+ (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
+ b0, b1, b0, b1, b0, b1, b0, b1));
}
static vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short __a,
- unsigned char __b) {
- __b *= 2;
- unsigned char b1 = __b + 1;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x07) * 2;
+ unsigned char b1 = b0 + 1;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
- __b, b1, __b, b1, __b, b1, __b, b1));
+ (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
+ b0, b1, b0, b1, b0, b1, b0, b1));
}
static vector bool short __ATTRS_o_ai vec_splat(vector bool short __a,
- unsigned char __b) {
- __b *= 2;
- unsigned char b1 = __b + 1;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x07) * 2;
+ unsigned char b1 = b0 + 1;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
- __b, b1, __b, b1, __b, b1, __b, b1));
+ (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
+ b0, b1, b0, b1, b0, b1, b0, b1));
}
static vector pixel __ATTRS_o_ai vec_splat(vector pixel __a,
- unsigned char __b) {
- __b *= 2;
- unsigned char b1 = __b + 1;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x07) * 2;
+ unsigned char b1 = b0 + 1;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
- __b, b1, __b, b1, __b, b1, __b, b1));
+ (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
+ b0, b1, b0, b1, b0, b1, b0, b1));
}
-static vector int __ATTRS_o_ai vec_splat(vector int __a, unsigned char __b) {
- __b *= 4;
- unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+static vector signed int __ATTRS_o_ai vec_splat(vector signed int __a,
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x03) * 4;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
- b1, b2, b3, __b, b1, b2, b3));
+ (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
+ b1, b2, b3, b0, b1, b2, b3));
}
static vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a,
- unsigned char __b) {
- __b *= 4;
- unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x03) * 4;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
- b1, b2, b3, __b, b1, b2, b3));
+ (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
+ b1, b2, b3, b0, b1, b2, b3));
}
static vector bool int __ATTRS_o_ai vec_splat(vector bool int __a,
- unsigned char __b) {
- __b *= 4;
- unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x03) * 4;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
- b1, b2, b3, __b, b1, b2, b3));
+ (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
+ b1, b2, b3, b0, b1, b2, b3));
}
static vector float __ATTRS_o_ai vec_splat(vector float __a,
- unsigned char __b) {
- __b *= 4;
- unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x03) * 4;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
return vec_perm(__a, __a,
- (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
- b1, b2, b3, __b, b1, b2, b3));
+ (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
+ b1, b2, b3, b0, b1, b2, b3));
}
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_splat(vector double __a,
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x01) * 8;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
+ b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+ return vec_perm(__a, __a,
+ (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
+ b0, b1, b2, b3, b4, b5, b6, b7));
+}
+static vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a,
+ unsigned const int __b) {
+ unsigned char b0 = (__b & 0x01) * 8;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
+ b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+ return vec_perm(__a, __a,
+ (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
+ b0, b1, b2, b3, b4, b5, b6, b7));
+}
+static vector signed long long __ATTRS_o_ai
+vec_splat(vector signed long long __a, unsigned const int __b) {
+ unsigned char b0 = (__b & 0x01) * 8;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
+ b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+ return vec_perm(__a, __a,
+ (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
+ b0, b1, b2, b3, b4, b5, b6, b7));
+}
+static vector unsigned long long __ATTRS_o_ai
+vec_splat(vector unsigned long long __a, unsigned const int __b) {
+ unsigned char b0 = (__b & 0x01) * 8;
+ unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
+ b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+ return vec_perm(__a, __a,
+ (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
+ b0, b1, b2, b3, b4, b5, b6, b7));
+}
+#endif
+
/* vec_vspltb */
#define __builtin_altivec_vspltb vec_vspltb
@@ -6529,7 +7580,8 @@ static vector unsigned int __ATTRS_o_ai vec_splat_u32(signed char __a) {
static vector signed char __ATTRS_o_ai vec_sr(vector signed char __a,
vector unsigned char __b) {
- return __a >> (vector signed char)__b;
+ vector unsigned char __res = (vector unsigned char)__a >> __b;
+ return (vector signed char)__res;
}
static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a,
@@ -6537,9 +7589,10 @@ static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a,
return __a >> __b;
}
-static vector short __ATTRS_o_ai vec_sr(vector short __a,
+static vector signed short __ATTRS_o_ai vec_sr(vector signed short __a,
vector unsigned short __b) {
- return __a >> (vector short)__b;
+ vector unsigned short __res = (vector unsigned short)__a >> __b;
+ return (vector signed short)__res;
}
static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a,
@@ -6547,8 +7600,10 @@ static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a,
return __a >> __b;
}
-static vector int __ATTRS_o_ai vec_sr(vector int __a, vector unsigned int __b) {
- return __a >> (vector int)__b;
+static vector signed int __ATTRS_o_ai vec_sr(vector signed int __a,
+ vector unsigned int __b) {
+ vector unsigned int __res = (vector unsigned int)__a >> __b;
+ return (vector signed int)__res;
}
static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a,
@@ -6559,7 +7614,8 @@ static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a,
#ifdef __POWER8_VECTOR__
static vector signed long long __ATTRS_o_ai
vec_sr(vector signed long long __a, vector unsigned long long __b) {
- return __a >> (vector long long)__b;
+ vector unsigned long long __res = (vector unsigned long long)__a >> __b;
+ return (vector signed long long)__res;
}
static vector unsigned long long __ATTRS_o_ai
@@ -7960,6 +9016,13 @@ static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
return __a - __b;
}
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_sub(vector double __a, vector double __b) {
+ return __a - __b;
+}
+#endif
+
/* vec_vsububm */
#define __builtin_altivec_vsububm vec_vsububm
@@ -8451,11 +9514,21 @@ vec_vsumsws(vector signed int __a, vector signed int __b) {
/* vec_trunc */
-static vector float __attribute__((__always_inline__))
+static vector float __ATTRS_o_ai
vec_trunc(vector float __a) {
+#ifdef __VSX__
+ return __builtin_vsx_xvrspiz(__a);
+#else
return __builtin_altivec_vrfiz(__a);
+#endif
}
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_trunc(vector double __a) {
+ return __builtin_vsx_xvrdpiz(__a);
+}
+#endif
+
/* vec_vrfiz */
static vector float __attribute__((__always_inline__))
@@ -8945,6 +10018,24 @@ static vector bool long long __ATTRS_o_ai vec_xor(vector bool long long __a,
vector bool long long __b) {
return __a ^ __b;
}
+
+static vector double __ATTRS_o_ai
+vec_xor(vector double __a, vector double __b) {
+ return (vector double)((vector unsigned long long)__a ^
+ (vector unsigned long long)__b);
+}
+
+static vector double __ATTRS_o_ai
+vec_xor(vector double __a, vector bool long long __b) {
+ return (vector double)((vector unsigned long long)__a ^
+ (vector unsigned long long) __b);
+}
+
+static vector double __ATTRS_o_ai
+vec_xor(vector bool long long __a, vector double __b) {
+ return (vector double)((vector unsigned long long)__a ^
+ (vector unsigned long long)__b);
+}
#endif
/* vec_vxor */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
index 59849e4..eb198a5 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
@@ -777,6 +777,1051 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
_mm_setzero_si128 (),
(__mmask8) __U);
}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) _mm_setzero_si128 (), __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
+ (__v8hi) __I /* idx */ ,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
+ __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
+ (__v16hi) __I /* idx */ ,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I/* idx */,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I/* idx */,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
(__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
(__v16qi)(__m128i)(b), \
OpenPOWER on IntegriCloud