diff options
Diffstat (limited to 'lib/Headers')
-rw-r--r-- | lib/Headers/emmintrin.h | 14 | ||||
-rw-r--r-- | lib/Headers/mmintrin.h | 8 | ||||
-rw-r--r-- | lib/Headers/xmmintrin.h | 5 |
3 files changed, 18 insertions, 9 deletions
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 0c1d730..ee12d3c 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -466,7 +466,10 @@ _mm_loadr_pd(double const *dp) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_loadu_pd(double const *dp) { - return (__m128d){ dp[0], dp[1] }; + struct __loadu_pd { + __m128d v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)dp)->v; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) @@ -478,13 +481,13 @@ _mm_load_sd(double const *dp) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_loadh_pd(__m128d a, double const *dp) { - return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2); + return (__m128d){ a[0], *dp }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_loadl_pd(__m128d a, double const *dp) { - return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1); + return (__m128d){ *dp, a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) @@ -1011,7 +1014,10 @@ _mm_load_si128(__m128i const *p) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_loadu_si128(__m128i const *p) { - return (__m128i)__builtin_ia32_loaddqu((char const *)p); + struct __loadu_si128 { + __m128i v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_si128*)p)->v; } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index fefb42f..986870a 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -421,20 +421,20 @@ _mm_set1_pi8(char __b) } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) -_mm_setr_pi32(int __i1, int __i0) +_mm_setr_pi32(int __i0, int __i1) { return _mm_set_pi32(__i1, __i0); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) -_mm_setr_pi16(short __w3, short __w2, short __w1, short __w0) +_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) -_mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, - char __b1, char __b0) +_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, + char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 00760ed..50f275d 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -539,7 +539,10 @@ _mm_load_ps(const float *p) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_loadu_ps(const float *p) { - return (__m128){ p[0], p[1], p[2], p[3] }; + struct __loadu_ps { + __m128 v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_ps*)p)->v; } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) |