8 files changed, 226 insertions, 63 deletions
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 1e9eacc..78fd6f1 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -1,22 +1,28 @@
 set(files
   altivec.h
   avxintrin.h
-  emmintrin.h	
-  float.h		
+  emmintrin.h
+  float.h
   immintrin.h
-  iso646.h	
-  limits.h	
-  mm_malloc.h	
-  mmintrin.h	
-  pmmintrin.h	
+  iso646.h
+  limits.h
+  mm3dnow.h
+  mmintrin.h
+  mm_malloc.h
+  nmmintrin.h
+  pmmintrin.h
   smmintrin.h
-  stdarg.h	
-  stdbool.h	
-  stddef.h	
-  stdint.h	
+  stdarg.h
+  stdbool.h
+  stddef.h
+  stdint.h
   tgmath.h
   tmmintrin.h
-  xmmintrin.h)
+  varargs.h
+  wmmintrin.h
+  x86intrin.h
+  xmmintrin.h
+  )
 
 set(output_dir ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/include)
 
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index 884d31c..2eb2f85 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -385,41 +385,23 @@ _mm256_dp_ps(__m256 a, __m256 b, const int c)
 #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
 #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
 
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_cmp_pd(__m128d a, __m128d b, const int c)
-{
-  return (__m128d)__builtin_ia32_cmppd((__v2df)a, (__v2df)b, c);
-}
+#define _mm_cmp_pd(a, b, c) \
+  (__m128d)__builtin_ia32_cmppd((__v2df)(a), (__v2df)(b), (c))
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_cmp_ps(__m128 a, __m128 b, const int c)
-{
-  return (__m128)__builtin_ia32_cmpps((__v4sf)a, (__v4sf)b, c);
-}
+#define _mm_cmp_ps(a, b, c) \
+  (__m128)__builtin_ia32_cmpps((__v4sf)(a), (__v4sf)(b), (c))
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
-_mm256_cmp_pd(__m256d a, __m256d b, const int c)
-{
-  return (__m256d)__builtin_ia32_cmppd256((__v4df)a, (__v4df)b, c);
-}
+#define _mm256_cmp_pd(a, b, c) \
+  (__m256d)__builtin_ia32_cmppd256((__v4df)(a), (__v4df)(b), (c))
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
-_mm256_cmp_ps(__m256 a, __m256 b, const int c)
-{
-  return (__m256)__builtin_ia32_cmpps256((__v8sf)a, (__v8sf)b, c);
-}
+#define _mm256_cmp_ps(a, b, c) \
+  (__m256)__builtin_ia32_cmpps256((__v8sf)(a), (__v8sf)(b), (c))
 
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_cmp_sd(__m128d a, __m128d b, const int c)
-{
-  return (__m128d)__builtin_ia32_cmpsd((__v2df)a, (__v2df)b, c);
-}
+#define _mm_cmp_sd(a, b, c) \
+  (__m128d)__builtin_ia32_cmpsd((__v2df)(a), (__v2df)(b), (c))
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_cmp_ss(__m128 a, __m128 b, const int c)
-{
-  return (__m128)__builtin_ia32_cmpss((__v4sf)a, (__v4sf)b, c);
-}
+#define _mm_cmp_ss(a, b, c) \
+  (__m128)__builtin_ia32_cmpss((__v4sf)(a), (__v4sf)(b), (c))
 
 /* Vector extract */
 static __inline __m128d __attribute__((__always_inline__, __nodebug__))
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 11b2581..0c1d730 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -466,7 +466,7 @@ _mm_loadr_pd(double const *dp)
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_loadu_pd(double const *dp)
 {
-  return __builtin_ia32_loadupd(dp);
+  return (__m128d){ dp[0], dp[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
@@ -1210,16 +1210,18 @@ _mm_movemask_epi8(__m128i a)
 }
 
 #define _mm_shuffle_epi32(a, imm) \
-  ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) {0}, \
+  ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) _mm_set1_epi32(0), \
                                     (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                     ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6))
+
+
 #define _mm_shufflelo_epi16(a, imm) \
-  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, \
+  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), \
                                     (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                     ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
                                     4, 5, 6, 7))
 #define _mm_shufflehi_epi16(a, imm) \
-  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, 0, 1, 2, 3, \
+  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) _mm_set1_epi16(0), 0, 1, 2, 3, \
                                     4 + (((imm) & 0x03) >> 0), \
                                     4 + (((imm) & 0x0c) >> 2), \
                                     4 + (((imm) & 0x30) >> 4), \
diff --git a/lib/Headers/mm3dnow.h b/lib/Headers/mm3dnow.h
new file mode 100644
index 0000000..2f456ad
--- /dev/null
+++ b/lib/Headers/mm3dnow.h
@@ -0,0 +1,161 @@
+/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#include <mmintrin.h>
+
+typedef float __v2sf __attribute__((__vector_size__(8)));
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_femms() {
+  __builtin_ia32_femms();
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pavgusb(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pf2id(__m64 __m) {
+  return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfacc(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfadd(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfcmpeq(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfcmpge(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfcmpgt(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfmax(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfmin(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfmul(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfrcp(__m64 __m) {
+  return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfrcpit1(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfrcpit2(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfrsqrt(__m64 __m) {
+  return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfrsqrtit1((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfsub(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfsubr(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pi2fd(__m64 __m) {
+  return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pmulhrw(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pf2iw(__m64 __m) {
+  return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfnacc(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pfpnacc(__m64 __m1, __m64 __m2) {
+  return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pi2fw(__m64 __m) {
+  return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pswapdsf(__m64 __m) {
+  return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_m_pswapdsi(__m64 __m) {
+  return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
+}
+
+#endif
diff --git a/lib/Headers/mm_malloc.h b/lib/Headers/mm_malloc.h
index e7da543..ec92362 100644
--- a/lib/Headers/mm_malloc.h
+++ b/lib/Headers/mm_malloc.h
@@ -40,6 +40,7 @@ extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size);
 #endif
 #endif
 
+#if !(defined(_WIN32) && defined(_mm_malloc))
 static __inline__ void *__attribute__((__always_inline__, __nodebug__,
                                        __malloc__))
 _mm_malloc(size_t size, size_t align)
@@ -67,5 +68,6 @@ _mm_free(void *p)
 {
   free(p);
 }
+#endif
 
 #endif /* __MM_MALLOC_H */
diff --git a/lib/Headers/stddef.h b/lib/Headers/stddef.h
index 7cc0bc1..9e87ee89 100644
--- a/lib/Headers/stddef.h
+++ b/lib/Headers/stddef.h
@@ -26,7 +26,10 @@
 #ifndef __STDDEF_H
 #define __STDDEF_H
 
+#ifndef _PTRDIFF_T
+#define _PTRDIFF_T
 typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
+#endif
 #ifndef _SIZE_T
 #define _SIZE_T
 typedef __typeof__(sizeof(int)) size_t;
@@ -51,7 +54,7 @@ typedef __WCHAR_TYPE__ wchar_t;
 #endif /* __STDDEF_H */
 
 /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
-__WINT_TYPE__ directly; accomodate both by requiring __need_wint_t */
+__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
 #if defined(__need_wint_t)
 #if !defined(_WINT_T)
 #define _WINT_T
diff --git a/lib/Headers/stdint.h b/lib/Headers/stdint.h
index 9498ed5..6f1a876 100644
--- a/lib/Headers/stdint.h
+++ b/lib/Headers/stdint.h
@@ -46,7 +46,7 @@
  * and 64-bit widths regardless of whether there are corresponding exact-width
  * types. 
  *
- * To accomodate targets that are missing types that are exactly 8, 16, 32, or
+ * To accommodate targets that are missing types that are exactly 8, 16, 32, or
  * 64 bits wide, this implementation takes an approach of cascading
  * redefintions, redefining __int_leastN_t to successively smaller exact-width
  * types. It is therefore important that the types are defined in order of
@@ -58,7 +58,7 @@
  *
  * In violation of the standard, some targets do not implement a type that is
  * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).  
- * To accomodate these targets, a required minimum-width type is only
+ * To accommodate these targets, a required minimum-width type is only
  * defined if there exists an exact-width type of equal or greater width.
  */
 
@@ -609,11 +609,15 @@ typedef __UINTMAX_TYPE__ uintmax_t;
 # define UINT_FAST8_MAX  __UINT_LEAST8_MAX
 #endif /* __INT_LEAST8_MIN */
 
+/* Some utility macros */
+#define  __INTN_MIN(n)  __stdint_join3( INT, n, _MIN)
+#define  __INTN_MAX(n)  __stdint_join3( INT, n, _MAX)
+#define __UINTN_MAX(n)  __stdint_join3(UINT, n, _MAX)
+#define  __INTN_C(n, v) __stdint_join3( INT, n, _C(v))
+#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
+
 /* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
 /* C99 7.18.3 Limits of other integer types. */
-#define  __INTN_MIN(n) __stdint_join3( INT, n, _MIN)
-#define  __INTN_MAX(n) __stdint_join3( INT, n, _MAX)
-#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
 
 #define  INTPTR_MIN  __INTN_MIN(__INTPTR_WIDTH__)
 #define  INTPTR_MAX  __INTN_MAX(__INTPTR_WIDTH__)
@@ -630,23 +634,26 @@ typedef __UINTMAX_TYPE__ uintmax_t;
 /* C99 7.18.3 Limits of other integer types. */
 #define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
 #define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
-#define WINT_MIN       __INTN_MIN(__WINT_WIDTH__)
-#define WINT_MAX       __INTN_MAX(__WINT_WIDTH__)
+#ifdef __WINT_UNSIGNED__
+# define WINT_MIN       __UINTN_C(__WINT_WIDTH__, 0)
+# define WINT_MAX       __UINTN_MAX(__WINT_WIDTH__)
+#else
+# define WINT_MIN       __INTN_MIN(__WINT_WIDTH__)
+# define WINT_MAX       __INTN_MAX(__WINT_WIDTH__)
+#endif
 
-/* FIXME: if we ever support a target with unsigned wchar_t, this should be
- * 0 .. Max.
- */
 #ifndef WCHAR_MAX
-#define WCHAR_MAX __INTN_MAX(__WCHAR_WIDTH__)
+# define WCHAR_MAX __WCHAR_MAX__
 #endif
 #ifndef WCHAR_MIN
-#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
+# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
+#  define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
+# else
+#  define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
+# endif
 #endif
 
 /* 7.18.4.2 Macros for greatest-width integer constants. */
-#define  __INTN_C(n, v) __stdint_join3( INT, n, _C(v))
-#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
-
 #define INTMAX_C(v)   __INTN_C(__INTMAX_WIDTH__, v)
 #define UINTMAX_C(v) __UINTN_C(__INTMAX_WIDTH__, v)
 
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 42dd3e8..00760ed 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -539,7 +539,7 @@ _mm_load_ps(const float *p)
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_loadu_ps(const float *p)
 {
-  return __builtin_ia32_loadups(p);
+  return (__m128){ p[0], p[1], p[2], p[3] };
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))