summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers/emmintrin.h')
-rw-r--r--contrib/llvm/tools/clang/lib/Headers/emmintrin.h365
1 files changed, 251 insertions, 114 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
index 1512f9f..709815c 100644
--- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
@@ -302,7 +302,7 @@ _mm_min_pd(__m128d __a, __m128d __b)
return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
}
-/// \brief Compares lower 64-bits double-precision values of both operands, and
+/// \brief Compares lower 64-bit double-precision values of both operands, and
/// returns the greater of the pair of values in the lower 64-bits of the
/// result. The upper 64 bits of the result are copied from the upper double-
/// precision value of the first operand.
@@ -462,8 +462,9 @@ _mm_cmplt_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are less than or equal to those in the second operand. Each
-/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are less than or equal to those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -482,8 +483,9 @@ _mm_cmple_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are greater than those in the second operand. Each comparison
-/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are greater than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -502,8 +504,9 @@ _mm_cmpgt_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are greater than or equal to those in the second operand. Each
-/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are greater than or equal to those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -522,9 +525,10 @@ _mm_cmpge_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are ordered with respect to those in the second operand. A pair
-/// of double-precision values are "ordered" with respect to each other if
-/// neither value is a NaN. Each comparison yields 0h for false,
+/// operand are ordered with respect to those in the second operand.
+///
+/// A pair of double-precision values are "ordered" with respect to each
+/// other if neither value is a NaN. Each comparison yields 0h for false,
/// FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
@@ -544,9 +548,10 @@ _mm_cmpord_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are unordered with respect to those in the second operand. A pair
-/// of double-precision values are "unordered" with respect to each other if
-/// one or both values are NaN. Each comparison yields 0h for false,
+/// operand are unordered with respect to those in the second operand.
+///
+/// A pair of double-precision values are "unordered" with respect to each
+/// other if one or both values are NaN. Each comparison yields 0h for false,
/// FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
@@ -567,8 +572,9 @@ _mm_cmpunord_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are unequal to those in the second operand. Each comparison
-/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are unequal to those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -587,8 +593,9 @@ _mm_cmpneq_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are not less than those in the second operand. Each comparison
-/// yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are not less than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -607,8 +614,9 @@ _mm_cmpnlt_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are not less than or equal to those in the second operand. Each
-/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are not less than or equal to those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -627,8 +635,9 @@ _mm_cmpnle_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
-/// operand are not greater than those in the second operand. Each
-/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// operand are not greater than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -648,6 +657,7 @@ _mm_cmpngt_pd(__m128d __a, __m128d __b)
/// \brief Compares each of the corresponding double-precision values of the
/// 128-bit vectors of [2 x double] to determine if the values in the first
/// operand are not greater than or equal to those in the second operand.
+///
/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
@@ -666,8 +676,9 @@ _mm_cmpnge_pd(__m128d __a, __m128d __b)
}
/// \brief Compares the lower double-precision floating-point values in each of
-/// the two 128-bit floating-point vectors of [2 x double] for equality. The
-/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+/// the two 128-bit floating-point vectors of [2 x double] for equality.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -690,8 +701,9 @@ _mm_cmpeq_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than the corresponding value in
-/// the second parameter. The comparison yields 0h for false,
-/// FFFFFFFFFFFFFFFFh for true.
+/// the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -714,8 +726,9 @@ _mm_cmplt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0h for
-/// false, FFFFFFFFFFFFFFFFh for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -738,8 +751,9 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than the corresponding value
-/// in the second parameter. The comparison yields 0h for false,
-/// FFFFFFFFFFFFFFFFh for true.
+/// in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -763,8 +777,9 @@ _mm_cmpgt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0h for
-/// false, FFFFFFFFFFFFFFFFh for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -788,9 +803,11 @@ _mm_cmpge_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is "ordered" with respect to the
-/// corresponding value in the second parameter. The comparison yields 0h for
-/// false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
-/// "ordered" with respect to each other if neither value is a NaN.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+/// double-precision values are "ordered" with respect to each other if
+/// neither value is a NaN.
///
/// \headerfile <x86intrin.h>
///
@@ -813,9 +830,11 @@ _mm_cmpord_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is "unordered" with respect to the
-/// corresponding value in the second parameter. The comparison yields 0h
-/// for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
-/// are "unordered" with respect to each other if one or both values are NaN.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+/// double-precision values are "unordered" with respect to each other if one
+/// or both values are NaN.
///
/// \headerfile <x86intrin.h>
///
@@ -839,8 +858,9 @@ _mm_cmpunord_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is unequal to the corresponding value in
-/// the second parameter. The comparison yields 0h for false,
-/// FFFFFFFFFFFFFFFFh for true.
+/// the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -863,8 +883,9 @@ _mm_cmpneq_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is not less than the corresponding
-/// value in the second parameter. The comparison yields 0h for false,
-/// FFFFFFFFFFFFFFFFh for true.
+/// value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -887,8 +908,9 @@ _mm_cmpnlt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is not less than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0h
-/// for false, FFFFFFFFFFFFFFFFh for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -911,8 +933,9 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is not greater than the corresponding
-/// value in the second parameter. The comparison yields 0h for false,
-/// FFFFFFFFFFFFFFFFh for true.
+/// value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -936,8 +959,9 @@ _mm_cmpngt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is not greater than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0h
-/// for false, FFFFFFFFFFFFFFFFh for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -982,7 +1006,9 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than the corresponding value in
-/// the second parameter. The comparison yields 0 for false, 1 for true.
+/// the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true.
///
/// \headerfile <x86intrin.h>
///
@@ -1004,8 +1030,9 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0 for
-/// false, 1 for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true.
///
/// \headerfile <x86intrin.h>
///
@@ -1027,7 +1054,9 @@ _mm_comile_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than the corresponding value
-/// in the second parameter. The comparison yields 0 for false, 1 for true.
+/// in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true.
///
/// \headerfile <x86intrin.h>
///
@@ -1049,8 +1078,9 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0 for
-/// false, 1 for true.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true.
///
/// \headerfile <x86intrin.h>
///
@@ -1072,7 +1102,9 @@ _mm_comige_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is unequal to the corresponding value in
-/// the second parameter. The comparison yields 0 for false, 1 for true.
+/// the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true.
///
/// \headerfile <x86intrin.h>
///
@@ -1093,8 +1125,9 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
-/// comparison yields 0 for false, 1 for true. If either of the two lower
-/// double-precision values is NaN, 1 is returned.
+/// comparison yields 0 for false, 1 for true.
+///
+/// If either of the two lower double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1117,8 +1150,10 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than the corresponding value in
-/// the second parameter. The comparison yields 0 for false, 1 for true. If
-/// either of the two lower double-precision values is NaN, 1 is returned.
+/// the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two lower
+/// double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1141,9 +1176,10 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is less than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0 for
-/// false, 1 for true. If either of the two lower double-precision values is
-/// NaN, 1 is returned.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two lower
+/// double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1166,8 +1202,10 @@ _mm_ucomile_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than the corresponding value
-/// in the second parameter. The comparison yields 0 for false, 1 for true.
-/// If either of the two lower double-precision values is NaN, 0 is returned.
+/// in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two lower
+/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1190,9 +1228,10 @@ _mm_ucomigt_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is greater than or equal to the
-/// corresponding value in the second parameter. The comparison yields 0 for
-/// false, 1 for true. If either of the two lower double-precision values
-/// is NaN, 0 is returned.
+/// corresponding value in the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1215,8 +1254,10 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] to determine if
/// the value in the first parameter is unequal to the corresponding value in
-/// the second parameter. The comparison yields 0 for false, 1 for true. If
-/// either of the two lower double-precision values is NaN, 0 is returned.
+/// the second parameter.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two lower
+/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@@ -1278,8 +1319,9 @@ _mm_cvtps_pd(__m128 __a)
/// \brief Converts the lower two integer elements of a 128-bit vector of
/// [4 x i32] into two double-precision floating-point values, returned in a
-/// 128-bit vector of [2 x double]. The upper two elements of the input
-/// vector are unused.
+/// 128-bit vector of [2 x double].
+///
+/// The upper two elements of the input vector are unused.
///
/// \headerfile <x86intrin.h>
///
@@ -1287,7 +1329,9 @@ _mm_cvtps_pd(__m128 __a)
///
/// \param __a
/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are
-/// converted to double-precision values. The upper two elements are unused.
+/// converted to double-precision values.
+///
+/// The upper two elements are unused.
/// \returns A 128-bit vector of [2 x double] containing the converted values.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtepi32_pd(__m128i __a)
@@ -1409,10 +1453,11 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
/// \brief Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
-/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the
-/// result of either conversion is inexact, the result is truncated (rounded
-/// towards zero) regardless of the current MXCSR setting. The upper 64 bits
-/// of the result vector are set to zero.
+/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].
+///
+/// If the result of either conversion is inexact, the result is truncated
+/// (rounded towards zero) regardless of the current MXCSR setting. The upper
+/// 64 bits of the result vector are set to zero.
///
/// \headerfile <x86intrin.h>
///
@@ -1466,9 +1511,10 @@ _mm_cvtpd_pi32(__m128d __a)
/// \brief Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
-/// returned in a 64-bit vector of [2 x i32]. If the result of either
-/// conversion is inexact, the result is truncated (rounded towards zero)
-/// regardless of the current MXCSR setting.
+/// returned in a 64-bit vector of [2 x i32].
+///
+/// If the result of either conversion is inexact, the result is truncated
+/// (rounded towards zero) regardless of the current MXCSR setting.
///
/// \headerfile <x86intrin.h>
///
@@ -1599,6 +1645,17 @@ _mm_loadu_pd(double const *__dp)
return ((struct __loadu_pd*)__dp)->__v;
}
+/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer
+/// vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
+///
+/// \param __a
+/// A pointer to a 64-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadu_si64(void const *__a)
{
@@ -1609,6 +1666,17 @@ _mm_loadu_si64(void const *__a)
return (__m128i){__u, 0L};
}
+/// \brief Loads a 64-bit double-precision value to the low element of a
+/// 128-bit integer vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
+///
+/// \param __dp
+/// A pointer to a memory location containing a double-precision value.
+/// The address of the memory location does not have to be aligned.
+/// \returns A 128-bit vector of [2 x double] containing the loaded value.
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_load_sd(double const *__dp)
{
@@ -1728,6 +1796,24 @@ _mm_set1_pd(double __w)
return (__m128d){ __w, __w };
}
+/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each
+/// of the two double-precision floating-point vector elements set to the
+/// specified double-precision floating-point value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.
+///
+/// \param __w
+/// A double-precision floating-point value used to initialize each vector
+/// element of the result.
+/// \returns An initialized 128-bit floating-point vector of [2 x double].
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_set_pd1(double __w)
+{
+ return _mm_set1_pd(__w);
+}
+
/// \brief Constructs a 128-bit floating-point vector of [2 x double]
/// initialized with the specified double-precision floating-point values.
///
@@ -1787,7 +1873,7 @@ _mm_setzero_pd(void)
/// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower
/// 64 bits are set to the lower 64 bits of the second parameter. The upper
/// 64 bits are set to the upper 64 bits of the first parameter.
-//
+///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
@@ -1825,12 +1911,38 @@ _mm_store_sd(double *__dp, __m128d __a)
((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
}
+/// \brief Moves packed double-precision values from a 128-bit vector of
+/// [2 x double] to a memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.
+///
+/// \param __dp
+/// A pointer to an aligned memory location that can store two
+/// double-precision values.
+/// \param __a
+/// A packed 128-bit vector of [2 x double] containing the values to be
+/// moved.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_pd(double *__dp, __m128d __a)
{
*(__m128d*)__dp = __a;
}
+/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
+/// the upper and lower 64 bits of a memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
+///
+/// \param __dp
+/// A pointer to a memory location that can store two double-precision
+/// values.
+/// \param __a
+/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
+/// of the values in \a dp.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store1_pd(double *__dp, __m128d __a)
{
@@ -1940,8 +2052,9 @@ _mm_storel_pd(double *__dp, __m128d __a)
/// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8],
/// saving the lower 8 bits of each sum in the corresponding element of a
-/// 128-bit result vector of [16 x i8]. The integer elements of both
-/// parameters can be either signed or unsigned.
+/// 128-bit result vector of [16 x i8].
+///
+/// The integer elements of both parameters can be either signed or unsigned.
///
/// \headerfile <x86intrin.h>
///
@@ -1961,8 +2074,9 @@ _mm_add_epi8(__m128i __a, __m128i __b)
/// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16],
/// saving the lower 16 bits of each sum in the corresponding element of a
-/// 128-bit result vector of [8 x i16]. The integer elements of both
-/// parameters can be either signed or unsigned.
+/// 128-bit result vector of [8 x i16].
+///
+/// The integer elements of both parameters can be either signed or unsigned.
///
/// \headerfile <x86intrin.h>
///
@@ -1982,8 +2096,9 @@ _mm_add_epi16(__m128i __a, __m128i __b)
/// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32],
/// saving the lower 32 bits of each sum in the corresponding element of a
-/// 128-bit result vector of [4 x i32]. The integer elements of both
-/// parameters can be either signed or unsigned.
+/// 128-bit result vector of [4 x i32].
+///
+/// The integer elements of both parameters can be either signed or unsigned.
///
/// \headerfile <x86intrin.h>
///
@@ -2021,8 +2136,9 @@ _mm_add_si64(__m64 __a, __m64 __b)
/// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64],
/// saving the lower 64 bits of each sum in the corresponding element of a
-/// 128-bit result vector of [2 x i64]. The integer elements of both
-/// parameters can be either signed or unsigned.
+/// 128-bit result vector of [2 x i64].
+///
+/// The integer elements of both parameters can be either signed or unsigned.
///
/// \headerfile <x86intrin.h>
///
@@ -2168,10 +2284,12 @@ _mm_avg_epu16(__m128i __a, __m128i __b)
/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
/// vectors, producing eight intermediate 32-bit signed integer products, and
/// adds the consecutive pairs of 32-bit products to form a 128-bit signed
-/// [4 x i32] vector. For example, bits [15:0] of both parameters are
-/// multiplied producing a 32-bit product, bits [31:16] of both parameters
-/// are multiplied producing a 32-bit product, and the sum of those two
-/// products becomes bits [31:0] of the result.
+/// [4 x i32] vector.
+///
+/// For example, bits [15:0] of both parameters are multiplied producing a
+/// 32-bit product, bits [31:16] of both parameters are multiplied producing
+/// a 32-bit product, and the sum of those two products becomes bits [31:0]
+/// of the result.
///
/// \headerfile <x86intrin.h>
///
@@ -2369,7 +2487,7 @@ _mm_mul_epu32(__m128i __a, __m128i __b)
/// \brief Computes the absolute differences of corresponding 8-bit integer
/// values in two 128-bit vectors. Sums the first 8 absolute differences, and
-/// separately sums the second 8 absolute differences. Packss these two
+/// separately sums the second 8 absolute differences. Packs these two
/// unsigned 16-bit integer sums into the upper and lower elements of a
/// [2 x i64] vector.
///
@@ -3106,8 +3224,9 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
/// \brief Compares each of the corresponding signed 16-bit values of the
/// 128-bit integer vectors to determine if the values in the first operand
-/// are greater than those in the second operand. Each comparison yields 0h
-/// for false, FFFFh for true.
+/// are greater than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -3126,8 +3245,9 @@ _mm_cmpgt_epi16(__m128i __a, __m128i __b)
/// \brief Compares each of the corresponding signed 32-bit values of the
/// 128-bit integer vectors to determine if the values in the first operand
-/// are greater than those in the second operand. Each comparison yields 0h
-/// for false, FFFFFFFFh for true.
+/// are greater than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -3146,8 +3266,9 @@ _mm_cmpgt_epi32(__m128i __a, __m128i __b)
/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
/// integer vectors to determine if the values in the first operand are less
-/// than those in the second operand. Each comparison yields 0h for false,
-/// FFh for true.
+/// than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -3166,8 +3287,9 @@ _mm_cmplt_epi8(__m128i __a, __m128i __b)
/// \brief Compares each of the corresponding signed 16-bit values of the
/// 128-bit integer vectors to determine if the values in the first operand
-/// are less than those in the second operand. Each comparison yields 0h for
-/// false, FFFFh for true.
+/// are less than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -3186,8 +3308,9 @@ _mm_cmplt_epi16(__m128i __a, __m128i __b)
/// \brief Compares each of the corresponding signed 32-bit values of the
/// 128-bit integer vectors to determine if the values in the first operand
-/// are less than those in the second operand. Each comparison yields 0h for
-/// false, FFFFFFFFh for true.
+/// are less than those in the second operand.
+///
+/// Each comparison yields 0h for false, FFFFFFFFh for true.
///
/// \headerfile <x86intrin.h>
///
@@ -3885,10 +4008,11 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
/// \brief Moves bytes selected by the mask from the first operand to the
/// specified unaligned memory location. When a mask bit is 1, the
-/// corresponding byte is written, otherwise it is not written. To minimize
-/// caching, the date is flagged as non-temporal (unlikely to be used again
-/// soon). Exception and trap behavior for elements not selected for storage
-/// to memory are implementation dependent.
+/// corresponding byte is written, otherwise it is not written.
+///
+/// To minimize caching, the date is flagged as non-temporal (unlikely to be
+/// used again soon). Exception and trap behavior for elements not selected
+/// for storage to memory are implementation dependent.
///
/// \headerfile <x86intrin.h>
///
@@ -3932,8 +4056,10 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
}
/// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit
-/// aligned memory location. To minimize caching, the data is flagged as
-/// non-temporal (unlikely to be used again soon).
+/// aligned memory location.
+///
+/// To minimize caching, the data is flagged as non-temporal (unlikely to be
+/// used again soon).
///
/// \headerfile <x86intrin.h>
///
@@ -3950,6 +4076,7 @@ _mm_stream_pd(double *__p, __m128d __a)
}
/// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location.
+///
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
@@ -3967,8 +4094,9 @@ _mm_stream_si128(__m128i *__p, __m128i __a)
__builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
}
-/// \brief Stores a 32-bit integer value in the specified memory location. To
-/// minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 32-bit integer value in the specified memory location.
+///
+/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
/// \headerfile <x86intrin.h>
@@ -3986,8 +4114,9 @@ _mm_stream_si32(int *__p, int __a)
}
#ifdef __x86_64__
-/// \brief Stores a 64-bit integer value in the specified memory location. To
-/// minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 64-bit integer value in the specified memory location.
+///
+/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
/// \headerfile <x86intrin.h>
@@ -4019,7 +4148,7 @@ extern "C" {
/// \param __p
/// A pointer to the memory location used to identify the cache line to be
/// flushed.
-void _mm_clflush(void const *);
+void _mm_clflush(void const * __p);
/// \brief Forces strong memory ordering (serialization) between load
/// instructions preceding this instruction and load instructions following
@@ -4141,7 +4270,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
/// \param __a
/// A 128-bit integer vector.
/// \param __imm
-/// An immediate value. Bits [3:0] selects values from \a __a to be assigned
+/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
/// to bits[15:0] of the result. \n
/// 000: assign values from bits [15:0] of \a __a. \n
/// 001: assign values from bits [31:16] of \a __a. \n
@@ -4788,4 +4917,12 @@ void _mm_pause(void);
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
+#define _MM_DENORMALS_ZERO_ON (0x0040)
+#define _MM_DENORMALS_ZERO_OFF (0x0000)
+
+#define _MM_DENORMALS_ZERO_MASK (0x0040)
+
+#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
+
#endif /* __EMMINTRIN_H */
OpenPOWER on IntegriCloud