diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Headers/emmintrin.h')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Headers/emmintrin.h | 365 |
1 files changed, 251 insertions, 114 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h index 1512f9f..709815c 100644 --- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h @@ -302,7 +302,7 @@ _mm_min_pd(__m128d __a, __m128d __b) return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } -/// \brief Compares lower 64-bits double-precision values of both operands, and +/// \brief Compares lower 64-bit double-precision values of both operands, and /// returns the greater of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper double- /// precision value of the first operand. @@ -462,8 +462,9 @@ _mm_cmplt_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are less than or equal to those in the second operand. Each -/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are less than or equal to those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -482,8 +483,9 @@ _mm_cmple_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are greater than those in the second operand. Each comparison -/// yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are greater than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -502,8 +504,9 @@ _mm_cmpgt_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are greater than or equal to those in the second operand. Each -/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are greater than or equal to those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -522,9 +525,10 @@ _mm_cmpge_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are ordered with respect to those in the second operand. A pair -/// of double-precision values are "ordered" with respect to each other if -/// neither value is a NaN. Each comparison yields 0h for false, +/// operand are ordered with respect to those in the second operand. +/// +/// A pair of double-precision values are "ordered" with respect to each +/// other if neither value is a NaN. Each comparison yields 0h for false, /// FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> @@ -544,9 +548,10 @@ _mm_cmpord_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are unordered with respect to those in the second operand. A pair -/// of double-precision values are "unordered" with respect to each other if -/// one or both values are NaN. Each comparison yields 0h for false, +/// operand are unordered with respect to those in the second operand. +/// +/// A pair of double-precision values are "unordered" with respect to each +/// other if one or both values are NaN. Each comparison yields 0h for false, /// FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> @@ -567,8 +572,9 @@ _mm_cmpunord_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are unequal to those in the second operand. Each comparison -/// yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are unequal to those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -587,8 +593,9 @@ _mm_cmpneq_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are not less than those in the second operand. Each comparison -/// yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are not less than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -607,8 +614,9 @@ _mm_cmpnlt_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are not less than or equal to those in the second operand. Each -/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are not less than or equal to those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -627,8 +635,9 @@ _mm_cmpnle_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first -/// operand are not greater than those in the second operand. Each -/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// operand are not greater than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -648,6 +657,7 @@ _mm_cmpngt_pd(__m128d __a, __m128d __b) /// \brief Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. +/// /// Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> @@ -666,8 +676,9 @@ _mm_cmpnge_pd(__m128d __a, __m128d __b) } /// \brief Compares the lower double-precision floating-point values in each of -/// the two 128-bit floating-point vectors of [2 x double] for equality. The -/// comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. +/// the two 128-bit floating-point vectors of [2 x double] for equality. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -690,8 +701,9 @@ _mm_cmpeq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in -/// the second parameter. The comparison yields 0h for false, -/// FFFFFFFFFFFFFFFFh for true. +/// the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -714,8 +726,9 @@ _mm_cmplt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the -/// corresponding value in the second parameter. The comparison yields 0h for -/// false, FFFFFFFFFFFFFFFFh for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -738,8 +751,9 @@ _mm_cmple_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value -/// in the second parameter. The comparison yields 0h for false, -/// FFFFFFFFFFFFFFFFh for true. +/// in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -763,8 +777,9 @@ _mm_cmpgt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the -/// corresponding value in the second parameter. The comparison yields 0h for -/// false, FFFFFFFFFFFFFFFFh for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -788,9 +803,11 @@ _mm_cmpge_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "ordered" with respect to the -/// corresponding value in the second parameter. The comparison yields 0h for -/// false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are -/// "ordered" with respect to each other if neither value is a NaN. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of +/// double-precision values are "ordered" with respect to each other if +/// neither value is a NaN. /// /// \headerfile <x86intrin.h> /// @@ -813,9 +830,11 @@ _mm_cmpord_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "unordered" with respect to the -/// corresponding value in the second parameter. The comparison yields 0h -/// for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values -/// are "unordered" with respect to each other if one or both values are NaN. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of +/// double-precision values are "unordered" with respect to each other if one +/// or both values are NaN. /// /// \headerfile <x86intrin.h> /// @@ -839,8 +858,9 @@ _mm_cmpunord_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in -/// the second parameter. The comparison yields 0h for false, -/// FFFFFFFFFFFFFFFFh for true. +/// the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -863,8 +883,9 @@ _mm_cmpneq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than the corresponding -/// value in the second parameter. The comparison yields 0h for false, -/// FFFFFFFFFFFFFFFFh for true. +/// value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -887,8 +908,9 @@ _mm_cmpnlt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than or equal to the -/// corresponding value in the second parameter. The comparison yields 0h -/// for false, FFFFFFFFFFFFFFFFh for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -911,8 +933,9 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than the corresponding -/// value in the second parameter. The comparison yields 0h for false, -/// FFFFFFFFFFFFFFFFh for true. +/// value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -936,8 +959,9 @@ _mm_cmpngt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than or equal to the -/// corresponding value in the second parameter. The comparison yields 0h -/// for false, FFFFFFFFFFFFFFFFh for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -982,7 +1006,9 @@ _mm_comieq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in -/// the second parameter. The comparison yields 0 for false, 1 for true. +/// the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. /// /// \headerfile <x86intrin.h> /// @@ -1004,8 +1030,9 @@ _mm_comilt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the -/// corresponding value in the second parameter. The comparison yields 0 for -/// false, 1 for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. /// /// \headerfile <x86intrin.h> /// @@ -1027,7 +1054,9 @@ _mm_comile_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value -/// in the second parameter. The comparison yields 0 for false, 1 for true. +/// in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. /// /// \headerfile <x86intrin.h> /// @@ -1049,8 +1078,9 @@ _mm_comigt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the -/// corresponding value in the second parameter. The comparison yields 0 for -/// false, 1 for true. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. /// /// \headerfile <x86intrin.h> /// @@ -1072,7 +1102,9 @@ _mm_comige_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in -/// the second parameter. The comparison yields 0 for false, 1 for true. +/// the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. /// /// \headerfile <x86intrin.h> /// @@ -1093,8 +1125,9 @@ _mm_comineq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. The -/// comparison yields 0 for false, 1 for true. If either of the two lower -/// double-precision values is NaN, 1 is returned. +/// comparison yields 0 for false, 1 for true. +/// +/// If either of the two lower double-precision values is NaN, 1 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1117,8 +1150,10 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in -/// the second parameter. The comparison yields 0 for false, 1 for true. If -/// either of the two lower double-precision values is NaN, 1 is returned. +/// the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. If either of the two lower +/// double-precision values is NaN, 1 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1141,9 +1176,10 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the -/// corresponding value in the second parameter. The comparison yields 0 for -/// false, 1 for true. If either of the two lower double-precision values is -/// NaN, 1 is returned. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. If either of the two lower +/// double-precision values is NaN, 1 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1166,8 +1202,10 @@ _mm_ucomile_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value -/// in the second parameter. The comparison yields 0 for false, 1 for true. -/// If either of the two lower double-precision values is NaN, 0 is returned. +/// in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. If either of the two lower +/// double-precision values is NaN, 0 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1190,9 +1228,10 @@ _mm_ucomigt_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the -/// corresponding value in the second parameter. The comparison yields 0 for -/// false, 1 for true. If either of the two lower double-precision values -/// is NaN, 0 is returned. +/// corresponding value in the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. If either of the two +/// lower double-precision values is NaN, 0 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1215,8 +1254,10 @@ _mm_ucomige_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in -/// the second parameter. The comparison yields 0 for false, 1 for true. If -/// either of the two lower double-precision values is NaN, 0 is returned. +/// the second parameter. +/// +/// The comparison yields 0 for false, 1 for true. If either of the two lower +/// double-precision values is NaN, 0 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1278,8 +1319,9 @@ _mm_cvtps_pd(__m128 __a) /// \brief Converts the lower two integer elements of a 128-bit vector of /// [4 x i32] into two double-precision floating-point values, returned in a -/// 128-bit vector of [2 x double]. The upper two elements of the input -/// vector are unused. +/// 128-bit vector of [2 x double]. +/// +/// The upper two elements of the input vector are unused. /// /// \headerfile <x86intrin.h> /// @@ -1287,7 +1329,9 @@ _mm_cvtps_pd(__m128 __a) /// /// \param __a /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are -/// converted to double-precision values. The upper two elements are unused. +/// converted to double-precision values. +/// +/// The upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) @@ -1409,10 +1453,11 @@ _mm_cvtss_sd(__m128d __a, __m128 __b) /// \brief Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, -/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the -/// result of either conversion is inexact, the result is truncated (rounded -/// towards zero) regardless of the current MXCSR setting. The upper 64 bits -/// of the result vector are set to zero. +/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. +/// +/// If the result of either conversion is inexact, the result is truncated +/// (rounded towards zero) regardless of the current MXCSR setting. The upper +/// 64 bits of the result vector are set to zero. /// /// \headerfile <x86intrin.h> /// @@ -1466,9 +1511,10 @@ _mm_cvtpd_pi32(__m128d __a) /// \brief Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, -/// returned in a 64-bit vector of [2 x i32]. If the result of either -/// conversion is inexact, the result is truncated (rounded towards zero) -/// regardless of the current MXCSR setting. +/// returned in a 64-bit vector of [2 x i32]. +/// +/// If the result of either conversion is inexact, the result is truncated +/// (rounded towards zero) regardless of the current MXCSR setting. /// /// \headerfile <x86intrin.h> /// @@ -1599,6 +1645,17 @@ _mm_loadu_pd(double const *__dp) return ((struct __loadu_pd*)__dp)->__v; } +/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer +/// vector and clears the upper element. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. +/// +/// \param __a +/// A pointer to a 64-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns A 128-bit vector of [2 x i64] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { @@ -1609,6 +1666,17 @@ _mm_loadu_si64(void const *__a) return (__m128i){__u, 0L}; } +/// \brief Loads a 64-bit double-precision value to the low element of a +/// 128-bit integer vector and clears the upper element. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. +/// +/// \param __dp +/// A pointer to a memory location containing a double-precision value. +/// The address of the memory location does not have to be aligned. +/// \returns A 128-bit vector of [2 x double] containing the loaded value. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { @@ -1728,6 +1796,24 @@ _mm_set1_pd(double __w) return (__m128d){ __w, __w }; } +/// \brief Constructs a 128-bit floating-point vector of [2 x double], with each +/// of the two double-precision floating-point vector elements set to the +/// specified double-precision floating-point value. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. +/// +/// \param __w +/// A double-precision floating-point value used to initialize each vector +/// element of the result. +/// \returns An initialized 128-bit floating-point vector of [2 x double]. +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_set_pd1(double __w) +{ + return _mm_set1_pd(__w); +} + /// \brief Constructs a 128-bit floating-point vector of [2 x double] /// initialized with the specified double-precision floating-point values. /// @@ -1787,7 +1873,7 @@ _mm_setzero_pd(void) /// \brief Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. -// +/// /// \headerfile <x86intrin.h> /// /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. @@ -1825,12 +1911,38 @@ _mm_store_sd(double *__dp, __m128d __a) ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; } +/// \brief Moves packed double-precision values from a 128-bit vector of +/// [2 x double] to a memory location. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. +/// +/// \param __dp +/// A pointer to an aligned memory location that can store two +/// double-precision values. +/// \param __a +/// A packed 128-bit vector of [2 x double] containing the values to be +/// moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a) { *(__m128d*)__dp = __a; } +/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to +/// the upper and lower 64 bits of a memory location. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. +/// +/// \param __dp +/// A pointer to a memory location that can store two double-precision +/// values. +/// \param __a +/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each +/// of the values in \a dp. static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { @@ -1940,8 +2052,9 @@ _mm_storel_pd(double *__dp, __m128d __a) /// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8], /// saving the lower 8 bits of each sum in the corresponding element of a -/// 128-bit result vector of [16 x i8]. The integer elements of both -/// parameters can be either signed or unsigned. +/// 128-bit result vector of [16 x i8]. +/// +/// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile <x86intrin.h> /// @@ -1961,8 +2074,9 @@ _mm_add_epi8(__m128i __a, __m128i __b) /// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16], /// saving the lower 16 bits of each sum in the corresponding element of a -/// 128-bit result vector of [8 x i16]. The integer elements of both -/// parameters can be either signed or unsigned. +/// 128-bit result vector of [8 x i16]. +/// +/// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile <x86intrin.h> /// @@ -1982,8 +2096,9 @@ _mm_add_epi16(__m128i __a, __m128i __b) /// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32], /// saving the lower 32 bits of each sum in the corresponding element of a -/// 128-bit result vector of [4 x i32]. The integer elements of both -/// parameters can be either signed or unsigned. +/// 128-bit result vector of [4 x i32]. +/// +/// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile <x86intrin.h> /// @@ -2021,8 +2136,9 @@ _mm_add_si64(__m64 __a, __m64 __b) /// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64], /// saving the lower 64 bits of each sum in the corresponding element of a -/// 128-bit result vector of [2 x i64]. The integer elements of both -/// parameters can be either signed or unsigned. +/// 128-bit result vector of [2 x i64]. +/// +/// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile <x86intrin.h> /// @@ -2168,10 +2284,12 @@ _mm_avg_epu16(__m128i __a, __m128i __b) /// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16] /// vectors, producing eight intermediate 32-bit signed integer products, and /// adds the consecutive pairs of 32-bit products to form a 128-bit signed -/// [4 x i32] vector. For example, bits [15:0] of both parameters are -/// multiplied producing a 32-bit product, bits [31:16] of both parameters -/// are multiplied producing a 32-bit product, and the sum of those two -/// products becomes bits [31:0] of the result. +/// [4 x i32] vector. +/// +/// For example, bits [15:0] of both parameters are multiplied producing a +/// 32-bit product, bits [31:16] of both parameters are multiplied producing +/// a 32-bit product, and the sum of those two products becomes bits [31:0] +/// of the result. /// /// \headerfile <x86intrin.h> /// @@ -2369,7 +2487,7 @@ _mm_mul_epu32(__m128i __a, __m128i __b) /// \brief Computes the absolute differences of corresponding 8-bit integer /// values in two 128-bit vectors. Sums the first 8 absolute differences, and -/// separately sums the second 8 absolute differences. Packss these two +/// separately sums the second 8 absolute differences. Packs these two /// unsigned 16-bit integer sums into the upper and lower elements of a /// [2 x i64] vector. /// @@ -3106,8 +3224,9 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b) /// \brief Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand -/// are greater than those in the second operand. Each comparison yields 0h -/// for false, FFFFh for true. +/// are greater than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -3126,8 +3245,9 @@ _mm_cmpgt_epi16(__m128i __a, __m128i __b) /// \brief Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand -/// are greater than those in the second operand. Each comparison yields 0h -/// for false, FFFFFFFFh for true. +/// are greater than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -3146,8 +3266,9 @@ _mm_cmpgt_epi32(__m128i __a, __m128i __b) /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are less -/// than those in the second operand. Each comparison yields 0h for false, -/// FFh for true. +/// than those in the second operand. +/// +/// Each comparison yields 0h for false, FFh for true. /// /// \headerfile <x86intrin.h> /// @@ -3166,8 +3287,9 @@ _mm_cmplt_epi8(__m128i __a, __m128i __b) /// \brief Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand -/// are less than those in the second operand. Each comparison yields 0h for -/// false, FFFFh for true. +/// are less than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -3186,8 +3308,9 @@ _mm_cmplt_epi16(__m128i __a, __m128i __b) /// \brief Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand -/// are less than those in the second operand. Each comparison yields 0h for -/// false, FFFFFFFFh for true. +/// are less than those in the second operand. +/// +/// Each comparison yields 0h for false, FFFFFFFFh for true. /// /// \headerfile <x86intrin.h> /// @@ -3885,10 +4008,11 @@ _mm_storeu_si128(__m128i *__p, __m128i __b) /// \brief Moves bytes selected by the mask from the first operand to the /// specified unaligned memory location. When a mask bit is 1, the -/// corresponding byte is written, otherwise it is not written. To minimize -/// caching, the date is flagged as non-temporal (unlikely to be used again -/// soon). Exception and trap behavior for elements not selected for storage -/// to memory are implementation dependent. +/// corresponding byte is written, otherwise it is not written. +/// +/// To minimize caching, the date is flagged as non-temporal (unlikely to be +/// used again soon). Exception and trap behavior for elements not selected +/// for storage to memory are implementation dependent. /// /// \headerfile <x86intrin.h> /// @@ -3932,8 +4056,10 @@ _mm_storel_epi64(__m128i *__p, __m128i __a) } /// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit -/// aligned memory location. To minimize caching, the data is flagged as -/// non-temporal (unlikely to be used again soon). +/// aligned memory location. +/// +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). /// /// \headerfile <x86intrin.h> /// @@ -3950,6 +4076,7 @@ _mm_stream_pd(double *__p, __m128d __a) } /// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location. +/// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// @@ -3967,8 +4094,9 @@ _mm_stream_si128(__m128i *__p, __m128i __a) __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); } -/// \brief Stores a 32-bit integer value in the specified memory location. To -/// minimize caching, the data is flagged as non-temporal (unlikely to be +/// \brief Stores a 32-bit integer value in the specified memory location. +/// +/// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile <x86intrin.h> @@ -3986,8 +4114,9 @@ _mm_stream_si32(int *__p, int __a) } #ifdef __x86_64__ -/// \brief Stores a 64-bit integer value in the specified memory location. To -/// minimize caching, the data is flagged as non-temporal (unlikely to be +/// \brief Stores a 64-bit integer value in the specified memory location. +/// +/// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile <x86intrin.h> @@ -4019,7 +4148,7 @@ extern "C" { /// \param __p /// A pointer to the memory location used to identify the cache line to be /// flushed. -void _mm_clflush(void const *); +void _mm_clflush(void const * __p); /// \brief Forces strong memory ordering (serialization) between load /// instructions preceding this instruction and load instructions following @@ -4141,7 +4270,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b) /// \param __a /// A 128-bit integer vector. /// \param __imm -/// An immediate value. Bits [3:0] selects values from \a __a to be assigned +/// An immediate value. Bits [2:0] selects values from \a __a to be assigned /// to bits[15:0] of the result. \n /// 000: assign values from bits [15:0] of \a __a. \n /// 001: assign values from bits [31:16] of \a __a. \n @@ -4788,4 +4917,12 @@ void _mm_pause(void); #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#define _MM_DENORMALS_ZERO_ON (0x0040) +#define _MM_DENORMALS_ZERO_OFF (0x0000) + +#define _MM_DENORMALS_ZERO_MASK (0x0040) + +#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) +#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) + #endif /* __EMMINTRIN_H */ |