diff options
Diffstat (limited to 'lib/Headers')
-rw-r--r-- | lib/Headers/CMakeLists.txt | 30 | ||||
-rw-r--r-- | lib/Headers/avxintrin.h | 42 | ||||
-rw-r--r-- | lib/Headers/emmintrin.h | 10 | ||||
-rw-r--r-- | lib/Headers/mm3dnow.h | 161 | ||||
-rw-r--r-- | lib/Headers/mm_malloc.h | 2 | ||||
-rw-r--r-- | lib/Headers/stddef.h | 5 | ||||
-rw-r--r-- | lib/Headers/stdint.h | 37 | ||||
-rw-r--r-- | lib/Headers/xmmintrin.h | 2 |
8 files changed, 226 insertions, 63 deletions
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 1e9eacc..78fd6f1 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -1,22 +1,28 @@ set(files altivec.h avxintrin.h - emmintrin.h - float.h + emmintrin.h + float.h immintrin.h - iso646.h - limits.h - mm_malloc.h - mmintrin.h - pmmintrin.h + iso646.h + limits.h + mm3dnow.h + mmintrin.h + mm_malloc.h + nmmintrin.h + pmmintrin.h smmintrin.h - stdarg.h - stdbool.h - stddef.h - stdint.h + stdarg.h + stdbool.h + stddef.h + stdint.h tgmath.h tmmintrin.h - xmmintrin.h) + varargs.h + wmmintrin.h + x86intrin.h + xmmintrin.h + ) set(output_dir ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/include) diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 884d31c..2eb2f85 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -385,41 +385,23 @@ _mm256_dp_ps(__m256 a, __m256 b, const int c) #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ -static __inline __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_pd(__m128d a, __m128d b, const int c) -{ - return (__m128d)__builtin_ia32_cmppd((__v2df)a, (__v2df)b, c); -} +#define _mm_cmp_pd(a, b, c) \ + (__m128d)__builtin_ia32_cmppd((__v2df)(a), (__v2df)(b), (c)) -static __inline __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_ps(__m128 a, __m128 b, const int c) -{ - return (__m128)__builtin_ia32_cmpps((__v4sf)a, (__v4sf)b, c); -} +#define _mm_cmp_ps(a, b, c) \ + (__m128)__builtin_ia32_cmpps((__v4sf)(a), (__v4sf)(b), (c)) -static __inline __m256d __attribute__((__always_inline__, __nodebug__)) -_mm256_cmp_pd(__m256d a, __m256d b, const int c) -{ - return (__m256d)__builtin_ia32_cmppd256((__v4df)a, (__v4df)b, c); -} +#define _mm256_cmp_pd(a, b, c) \ + (__m256d)__builtin_ia32_cmppd256((__v4df)(a), (__v4df)(b), (c)) -static __inline __m256 __attribute__((__always_inline__, __nodebug__)) -_mm256_cmp_ps(__m256 a, __m256 b, const int c) -{ - return (__m256)__builtin_ia32_cmpps256((__v8sf)a, (__v8sf)b, c); -} +#define _mm256_cmp_ps(a, b, c) \ + (__m256)__builtin_ia32_cmpps256((__v8sf)(a), (__v8sf)(b), (c)) -static __inline __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_sd(__m128d a, __m128d b, const int c) -{ - return (__m128d)__builtin_ia32_cmpsd((__v2df)a, (__v2df)b, c); -} +#define _mm_cmp_sd(a, b, c) \ + (__m128d)__builtin_ia32_cmpsd((__v2df)(a), (__v2df)(b), (c)) -static __inline __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_cmp_ss(__m128 a, __m128 b, const int c) -{ - return (__m128)__builtin_ia32_cmpss((__v4sf)a, (__v4sf)b, c); -} +#define _mm_cmp_ss(a, b, c) \ + (__m128)__builtin_ia32_cmpss((__v4sf)(a), (__v4sf)(b), (c)) /* Vector extract */ static __inline __m128d __attribute__((__always_inline__, __nodebug__)) diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 11b2581..0c1d730 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -466,7 +466,7 @@ _mm_loadr_pd(double const *dp) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_loadu_pd(double const *dp) { - return __builtin_ia32_loadupd(dp); + return (__m128d){ dp[0], dp[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) @@ -1210,16 +1210,18 @@ _mm_movemask_epi8(__m128i a) } #define _mm_shuffle_epi32(a, imm) \ - ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) {0}, \ + ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) _mm_set1_epi32(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6)) + + #define _mm_shufflelo_epi16(a, imm) \ - ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, \ + ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 4, 5, 6, 7)) #define _mm_shufflehi_epi16(a, imm) \ - ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, 0, 1, 2, 3, \ + ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), 0, 1, 2, 3, \ 4 + (((imm) & 0x03) >> 0), \ 4 + (((imm) & 0x0c) >> 2), \ 4 + (((imm) & 0x30) >> 4), \ diff --git a/lib/Headers/mm3dnow.h b/lib/Headers/mm3dnow.h new file mode 100644 index 0000000..2f456ad --- /dev/null +++ b/lib/Headers/mm3dnow.h @@ -0,0 +1,161 @@ +/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _MM3DNOW_H_INCLUDED +#define _MM3DNOW_H_INCLUDED + +#include <mmintrin.h> + +typedef float __v2sf __attribute__((__vector_size__(8))); + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_femms() { + __builtin_ia32_femms(); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pavgusb(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pf2id(__m64 __m) { + return (__m64)__builtin_ia32_pf2id((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfadd(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpeq(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpge(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpgt(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmax(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmin(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmul(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcp(__m64 __m) { + return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcpit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcpit2(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrsqrt(__m64 __m) { + return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrsqrtit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrsqrtit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfsub(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfsubr(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pi2fd(__m64 __m) { + return (__m64)__builtin_ia32_pi2fd((__v2si)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pmulhrw(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pf2iw(__m64 __m) { + return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfpnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pi2fw(__m64 __m) { + return (__m64)__builtin_ia32_pi2fw((__v2si)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pswapdsf(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pswapdsi(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); +} + +#endif diff --git a/lib/Headers/mm_malloc.h b/lib/Headers/mm_malloc.h index e7da543..ec92362 100644 --- a/lib/Headers/mm_malloc.h +++ b/lib/Headers/mm_malloc.h @@ -40,6 +40,7 @@ extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size); #endif #endif +#if !(defined(_WIN32) && defined(_mm_malloc)) static __inline__ void *__attribute__((__always_inline__, __nodebug__, __malloc__)) _mm_malloc(size_t size, size_t align) @@ -67,5 +68,6 @@ _mm_free(void *p) { free(p); } +#endif #endif /* __MM_MALLOC_H */ diff --git a/lib/Headers/stddef.h b/lib/Headers/stddef.h index 7cc0bc1..9e87ee89 100644 --- a/lib/Headers/stddef.h +++ b/lib/Headers/stddef.h @@ -26,7 +26,10 @@ #ifndef __STDDEF_H #define __STDDEF_H +#ifndef _PTRDIFF_T +#define _PTRDIFF_T typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t; +#endif #ifndef _SIZE_T #define _SIZE_T typedef __typeof__(sizeof(int)) size_t; @@ -51,7 +54,7 @@ typedef __WCHAR_TYPE__ wchar_t; #endif /* __STDDEF_H */ /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use -__WINT_TYPE__ directly; accomodate both by requiring __need_wint_t */ +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ #if defined(__need_wint_t) #if !defined(_WINT_T) #define _WINT_T diff --git a/lib/Headers/stdint.h b/lib/Headers/stdint.h index 9498ed5..6f1a876 100644 --- a/lib/Headers/stdint.h +++ b/lib/Headers/stdint.h @@ -46,7 +46,7 @@ * and 64-bit widths regardless of whether there are corresponding exact-width * types. * - * To accomodate targets that are missing types that are exactly 8, 16, 32, or + * To accommodate targets that are missing types that are exactly 8, 16, 32, or * 64 bits wide, this implementation takes an approach of cascading * redefintions, redefining __int_leastN_t to successively smaller exact-width * types. It is therefore important that the types are defined in order of @@ -58,7 +58,7 @@ * * In violation of the standard, some targets do not implement a type that is * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit). - * To accomodate these targets, a required minimum-width type is only + * To accommodate these targets, a required minimum-width type is only * defined if there exists an exact-width type of equal or greater width. */ @@ -609,11 +609,15 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define UINT_FAST8_MAX __UINT_LEAST8_MAX #endif /* __INT_LEAST8_MIN */ +/* Some utility macros */ +#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) +#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) +#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) +#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) +#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) + /* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ /* C99 7.18.3 Limits of other integer types. */ -#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) -#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) -#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) #define INTPTR_MIN __INTN_MIN(__INTPTR_WIDTH__) #define INTPTR_MAX __INTN_MAX(__INTPTR_WIDTH__) @@ -630,23 +634,26 @@ typedef __UINTMAX_TYPE__ uintmax_t; /* C99 7.18.3 Limits of other integer types. */ #define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) #define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) -#define WINT_MIN __INTN_MIN(__WINT_WIDTH__) -#define WINT_MAX __INTN_MAX(__WINT_WIDTH__) +#ifdef __WINT_UNSIGNED__ +# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) +# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) +#else +# define WINT_MIN __INTN_MIN(__WINT_WIDTH__) +# define WINT_MAX __INTN_MAX(__WINT_WIDTH__) +#endif -/* FIXME: if we ever support a target with unsigned wchar_t, this should be - * 0 .. Max. - */ #ifndef WCHAR_MAX -#define WCHAR_MAX __INTN_MAX(__WCHAR_WIDTH__) +# define WCHAR_MAX __WCHAR_MAX__ #endif #ifndef WCHAR_MIN -#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) +# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) +# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) +# else +# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) +# endif #endif /* 7.18.4.2 Macros for greatest-width integer constants. */ -#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) -#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) - #define INTMAX_C(v) __INTN_C(__INTMAX_WIDTH__, v) #define UINTMAX_C(v) __UINTN_C(__INTMAX_WIDTH__, v) diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 42dd3e8..00760ed 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -539,7 +539,7 @@ _mm_load_ps(const float *p) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_loadu_ps(const float *p) { - return __builtin_ia32_loadups(p); + return (__m128){ p[0], p[1], p[2], p[3] }; } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) |