22 files changed, 8190 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Headers/CMakeLists.txt b/contrib/llvm/tools/clang/lib/Headers/CMakeLists.txt
new file mode 100644
index 0000000..047fdb3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/CMakeLists.txt
@@ -0,0 +1,40 @@
+set(files
+  altivec.h
+  arm_neon.h
+  emmintrin.h	
+  float.h		
+  iso646.h	
+  limits.h	
+  mm_malloc.h	
+  mmintrin.h	
+  pmmintrin.h	
+  stdarg.h	
+  stdbool.h	
+  stddef.h	
+  stdint.h	
+  tgmath.h
+  tmmintrin.h
+  xmmintrin.h)
+
+if (MSVC_IDE OR XCODE)
+  set(output_dir ${LLVM_BINARY_DIR}/bin/lib/clang/${CLANG_VERSION}/include)
+else ()
+  set(output_dir ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/include)
+endif ()
+
+
+foreach( f ${files} )
+  set( src ${CMAKE_CURRENT_SOURCE_DIR}/${f} )
+  set( dst ${output_dir}/${f} )
+  add_custom_command(OUTPUT ${dst}
+    DEPENDS ${src}
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst}
+    COMMENT "Copying clang's ${f}...")
+endforeach( f )
+
+add_custom_target(clang-headers ALL
+  DEPENDS ${files})
+
+install(FILES ${files}
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+  DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
diff --git a/contrib/llvm/tools/clang/lib/Headers/Makefile b/contrib/llvm/tools/clang/lib/Headers/Makefile
new file mode 100644
index 0000000..cb36e84
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/Makefile
@@ -0,0 +1,40 @@
+##===- clang/lib/Headers/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+include $(LEVEL)/Makefile.common
+
+CLANG_VERSION := $(shell cat $(PROJ_SRC_DIR)/../../VER)
+
+HeaderDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/lib/clang/$(CLANG_VERSION)/include
+
+HEADERS := $(notdir $(wildcard $(PROJ_SRC_DIR)/*.h))
+
+OBJHEADERS := $(addprefix $(HeaderDir)/, $(HEADERS))
+
+
+$(OBJHEADERS): $(HeaderDir)/%.h: $(PROJ_SRC_DIR)/%.h $(HeaderDir)/.dir
+	$(Verb) cp $< $@
+	$(Echo) Copying $(notdir $<) to build dir
+
+# Hook into the standard Makefile rules.
+all-local:: $(OBJHEADERS)
+
+PROJ_headers := $(DESTDIR)$(PROJ_prefix)/lib/clang/$(CLANG_VERSION)/include
+
+INSTHEADERS := $(addprefix $(PROJ_headers)/, $(HEADERS))
+
+$(PROJ_headers):
+	$(Verb) $(MKDIR) $@
+
+$(INSTHEADERS): $(PROJ_headers)/%.h: $(HeaderDir)/%.h | $(PROJ_headers)
+	$(Verb) $(DataInstall) $< $(PROJ_headers)
+	$(Echo) Installing compiler include file: $(notdir $<)
+
+install-local:: $(INSTHEADERS)
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
new file mode 100644
index 0000000..1cd0db8
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -0,0 +1,1483 @@
+/*===---- altivec.h - Standard header for type generic math ---------------===*\
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#ifndef __ALTIVEC_H
+#define __ALTIVEC_H
+
+#ifndef __ALTIVEC__
+#error "AltiVec support not enabled"
+#endif
+
+/* constants for mapping CR6 bits to predicate result. */
+
+#define __CR6_EQ     0
+#define __CR6_EQ_REV 1
+#define __CR6_LT     2
+#define __CR6_LT_REV 3
+
+#define _ATTRS_o_ai __attribute__((__overloadable__, __always_inline__))
+
+/* vec_abs */
+
+#define __builtin_vec_abs vec_abs
+#define __builtin_altivec_abs_v16qi vec_abs
+#define __builtin_altivec_abs_v8hi  vec_abs
+#define __builtin_altivec_abs_v4si  vec_abs
+
+static vector signed char _ATTRS_o_ai
+vec_abs(vector signed char a)
+{
+  return __builtin_altivec_vmaxsb(a, -a);
+}
+
+static vector signed short _ATTRS_o_ai
+vec_abs(vector signed short a)
+{
+  return __builtin_altivec_vmaxsh(a, -a);
+}
+
+static vector signed int _ATTRS_o_ai
+vec_abs(vector signed int a)
+{
+  return __builtin_altivec_vmaxsw(a, -a);
+}
+
+static vector float _ATTRS_o_ai
+vec_abs(vector float a)
+{
+  vector unsigned int res = (vector unsigned int)a &
+                            (vector unsigned int)(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
+  return (vector float)res;
+}
+
+/* vec_abss */
+
+#define __builtin_vec_abss vec_abss
+#define __builtin_altivec_abss_v16qi vec_abss
+#define __builtin_altivec_abss_v8hi  vec_abss
+#define __builtin_altivec_abss_v4si  vec_abss
+
+static vector signed char _ATTRS_o_ai
+vec_abss(vector signed char a)
+{
+  return __builtin_altivec_vmaxsb(a, __builtin_altivec_vsubsbs(
+    (vector signed char)(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), a));
+}
+
+static vector signed short _ATTRS_o_ai
+vec_abss(vector signed short a)
+{
+  return __builtin_altivec_vmaxsh(a, __builtin_altivec_vsubshs(
+    (vector signed short)(0, 0, 0, 0, 0, 0, 0, 0), a));
+}
+
+static vector signed int _ATTRS_o_ai
+vec_abss(vector signed int a)
+{
+  return __builtin_altivec_vmaxsw(a, __builtin_altivec_vsubsws(
+    (vector signed int)(0, 0, 0, 0), a));
+}
+
+/* vec_add */
+
+#define __builtin_altivec_vaddubm vec_add
+#define __builtin_altivec_vadduhm vec_add
+#define __builtin_altivec_vadduwm vec_add
+#define __builtin_altivec_vaddfp  vec_add
+#define __builtin_vec_vaddubm vec_add
+#define __builtin_vec_vadduhm vec_add
+#define __builtin_vec_vadduwm vec_add
+#define __builtin_vec_vaddfp  vec_add
+#define vec_vaddubm vec_add
+#define vec_vadduhm vec_add
+#define vec_vadduwm vec_add
+#define vec_vaddfp  vec_add
+
+static vector signed char _ATTRS_o_ai
+vec_add(vector signed char a, vector signed char b)
+{
+  return a + b;
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_add(vector unsigned char a, vector unsigned char b)
+{
+  return a + b;
+}
+
+static vector short _ATTRS_o_ai
+vec_add(vector short a, vector short b)
+{
+  return a + b;
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_add(vector unsigned short a, vector unsigned short b)
+{
+  return a + b;
+}
+
+static vector int _ATTRS_o_ai
+vec_add(vector int a, vector int b)
+{
+  return a + b;
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_add(vector unsigned int a, vector unsigned int b)
+{
+  return a + b;
+}
+
+static vector float _ATTRS_o_ai
+vec_add(vector float a, vector float b)
+{
+  return a + b;
+}
+
+/* vec_addc */
+
+#define __builtin_vec_addc __builtin_altivec_vaddcuw
+#define vec_vaddcuw        __builtin_altivec_vaddcuw
+#define vec_addc           __builtin_altivec_vaddcuw
+
+/* vec_adds */
+
+#define __builtin_vec_vaddsbs __builtin_altivec_vaddsbs
+#define __builtin_vec_vaddubs __builtin_altivec_vaddubs
+#define __builtin_vec_vaddshs __builtin_altivec_vaddshs
+#define __builtin_vec_vadduhs __builtin_altivec_vadduhs
+#define __builtin_vec_vaddsws __builtin_altivec_vaddsws
+#define __builtin_vec_vadduws __builtin_altivec_vadduws
+#define vec_vaddsbs __builtin_altivec_vaddsbs
+#define vec_vaddubs __builtin_altivec_vaddubs
+#define vec_vaddshs __builtin_altivec_vaddshs
+#define vec_vadduhs __builtin_altivec_vadduhs
+#define vec_vaddsws __builtin_altivec_vaddsws
+#define vec_vadduws __builtin_altivec_vadduws
+
+static vector signed char _ATTRS_o_ai
+vec_adds(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vaddsbs(a, b);
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_adds(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vaddubs(a, b);
+}
+
+static vector short _ATTRS_o_ai
+vec_adds(vector short a, vector short b)
+{
+  return __builtin_altivec_vaddshs(a, b);
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_adds(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vadduhs(a, b);
+}
+
+static vector int _ATTRS_o_ai
+vec_adds(vector int a, vector int b)
+{
+  return __builtin_altivec_vaddsws(a, b);
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_adds(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vadduws(a, b);
+}
+
+/* vec_sub */
+
+#define __builtin_altivec_vsububm vec_sub
+#define __builtin_altivec_vsubuhm vec_sub
+#define __builtin_altivec_vsubuwm vec_sub
+#define __builtin_altivec_vsubfp  vec_sub
+#define __builtin_vec_vsububm vec_sub
+#define __builtin_vec_vsubuhm vec_sub
+#define __builtin_vec_vsubuwm vec_sub
+#define __builtin_vec_vsubfp  vec_sub
+#define vec_vsububm vec_sub
+#define vec_vsubuhm vec_sub
+#define vec_vsubuwm vec_sub
+#define vec_vsubfp  vec_sub
+
+static vector signed char _ATTRS_o_ai
+vec_sub(vector signed char a, vector signed char b)
+{
+  return a - b;
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_sub(vector unsigned char a, vector unsigned char b)
+{
+  return a - b;
+}
+
+static vector short _ATTRS_o_ai
+vec_sub(vector short a, vector short b)
+{
+  return a - b;
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_sub(vector unsigned short a, vector unsigned short b)
+{
+  return a - b;
+}
+
+static vector int _ATTRS_o_ai
+vec_sub(vector int a, vector int b)
+{
+  return a - b;
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_sub(vector unsigned int a, vector unsigned int b)
+{
+  return a - b;
+}
+
+static vector float _ATTRS_o_ai
+vec_sub(vector float a, vector float b)
+{
+  return a - b;
+}
+
+/* vec_subs */
+
+#define __builtin_vec_vsubsbs __builtin_altivec_vsubsbs
+#define __builtin_vec_vsububs __builtin_altivec_vsububs
+#define __builtin_vec_vsubshs __builtin_altivec_vsubshs
+#define __builtin_vec_vsubuhs __builtin_altivec_vsubuhs
+#define __builtin_vec_vsubsws __builtin_altivec_vsubsws
+#define __builtin_vec_vsubuws __builtin_altivec_vsubuws
+#define vec_vsubsbs __builtin_altivec_vsubsbs
+#define vec_vsububs __builtin_altivec_vsububs
+#define vec_vsubshs __builtin_altivec_vsubshs
+#define vec_vsubuhs __builtin_altivec_vsubuhs
+#define vec_vsubsws __builtin_altivec_vsubsws
+#define vec_vsubuws __builtin_altivec_vsubuws
+
+static vector signed char _ATTRS_o_ai
+vec_subs(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vsubsbs(a, b);
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_subs(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vsububs(a, b);
+}
+
+static vector short _ATTRS_o_ai
+vec_subs(vector short a, vector short b)
+{
+  return __builtin_altivec_vsubshs(a, b);
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_subs(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vsubuhs(a, b);
+}
+
+static vector int _ATTRS_o_ai
+vec_subs(vector int a, vector int b)
+{
+  return __builtin_altivec_vsubsws(a, b);
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_subs(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vsubuws(a, b);
+}
+
+/* vec_avg */
+
+#define __builtin_vec_vavgsb __builtin_altivec_vavgsb
+#define __builtin_vec_vavgub __builtin_altivec_vavgub
+#define __builtin_vec_vavgsh __builtin_altivec_vavgsh
+#define __builtin_vec_vavguh __builtin_altivec_vavguh
+#define __builtin_vec_vavgsw __builtin_altivec_vavgsw
+#define __builtin_vec_vavguw __builtin_altivec_vavguw
+#define vec_vavgsb __builtin_altivec_vavgsb
+#define vec_vavgub __builtin_altivec_vavgub
+#define vec_vavgsh __builtin_altivec_vavgsh
+#define vec_vavguh __builtin_altivec_vavguh
+#define vec_vavgsw __builtin_altivec_vavgsw
+#define vec_vavguw __builtin_altivec_vavguw
+
+static vector signed char _ATTRS_o_ai
+vec_avg(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vavgsb(a, b);
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_avg(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vavgub(a, b);
+}
+
+static vector short _ATTRS_o_ai
+vec_avg(vector short a, vector short b)
+{
+  return __builtin_altivec_vavgsh(a, b);
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_avg(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vavguh(a, b);
+}
+
+static vector int _ATTRS_o_ai
+vec_avg(vector int a, vector int b)
+{
+  return __builtin_altivec_vavgsw(a, b);
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_avg(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vavguw(a, b);
+}
+
+/* vec_st */
+
+#define __builtin_vec_st vec_st
+#define vec_stvx         vec_st
+
+static void _ATTRS_o_ai
+vec_st(vector signed char a, int b, vector signed char *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector unsigned char a, int b, vector unsigned char *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector short a, int b, vector short *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector unsigned short a, int b, vector unsigned short *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector int a, int b, vector int *c)
+{
+  __builtin_altivec_stvx(a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector unsigned int a, int b, vector unsigned int *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_st(vector float a, int b, vector float *c)
+{
+  __builtin_altivec_stvx((vector int)a, b, (void *)c);
+}
+
+/* vec_stl */
+
+#define __builtin_vec_stl vec_stl
+#define vec_stvxl         vec_stl
+
+static void _ATTRS_o_ai
+vec_stl(vector signed char a, int b, vector signed char *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector unsigned char a, int b, vector unsigned char *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector short a, int b, vector short *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector unsigned short a, int b, vector unsigned short *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector int a, int b, vector int *c)
+{
+  __builtin_altivec_stvxl(a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector unsigned int a, int b, vector unsigned int *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_stl(vector float a, int b, vector float *c)
+{
+  __builtin_altivec_stvxl((vector int)a, b, (void *)c);
+}
+
+/* vec_ste */
+
+#define __builtin_vec_stvebx __builtin_altivec_stvebx
+#define __builtin_vec_stvehx __builtin_altivec_stvehx
+#define __builtin_vec_stvewx __builtin_altivec_stvewx
+#define vec_stvebx __builtin_altivec_stvebx
+#define vec_stvehx __builtin_altivec_stvehx
+#define vec_stvewx __builtin_altivec_stvewx
+
+static void _ATTRS_o_ai
+vec_ste(vector signed char a, int b, vector signed char *c)
+{
+  __builtin_altivec_stvebx((vector char)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector unsigned char a, int b, vector unsigned char *c)
+{
+  __builtin_altivec_stvebx((vector char)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector short a, int b, vector short *c)
+{
+  __builtin_altivec_stvehx(a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector unsigned short a, int b, vector unsigned short *c)
+{
+  __builtin_altivec_stvehx((vector short)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector int a, int b, vector int *c)
+{
+  __builtin_altivec_stvewx(a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector unsigned int a, int b, vector unsigned int *c)
+{
+  __builtin_altivec_stvewx((vector int)a, b, (void *)c);
+}
+
+static void _ATTRS_o_ai
+vec_ste(vector float a, int b, vector float *c)
+{
+  __builtin_altivec_stvewx((vector int)a, b, (void *)c);
+}
+
+/* vec_cmpb */
+
+#define vec_cmpb           __builtin_altivec_vcmpbfp
+#define vec_vcmpbfp        __builtin_altivec_vcmpbfp
+#define __builtin_vec_cmpb __builtin_altivec_vcmpbfp
+
+/* vec_cmpeq */
+
+#define __builtin_vec_cmpeq vec_cmpeq
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmpeq(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpequb((vector char)a, (vector char)b);
+}
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmpeq(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpequb((vector char)a, (vector char)b);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmpeq(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpequh(a, b);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmpeq(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpequh((vector short)a, (vector short)b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpeq(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpequw(a, b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpeq(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpequw((vector int)a, (vector int)b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpeq(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp(a, b);
+}
+
+/* vec_cmpge */
+
+#define vec_cmpge           __builtin_altivec_vcmpgefp
+#define vec_vcmpgefp        __builtin_altivec_vcmpgefp
+#define __builtin_vec_cmpge __builtin_altivec_vcmpgefp
+
+/* vec_cmpgt */
+
+#define vec_vcmpgtsb __builtin_altivec_vcmpgtsb
+#define vec_vcmpgtub __builtin_altivec_vcmpgtub
+#define vec_vcmpgtsh __builtin_altivec_vcmpgtsh
+#define vec_vcmpgtuh __builtin_altivec_vcmpgtuh
+#define vec_vcmpgtsw __builtin_altivec_vcmpgtsw
+#define vec_vcmpgtuw __builtin_altivec_vcmpgtuw
+#define vec_vcmpgtfp __builtin_altivec_vcmpgtfp
+#define __builtin_vec_vcmpgtsb __builtin_altivec_vcmpgtsb
+#define __builtin_vec_vcmpgtub __builtin_altivec_vcmpgtub
+#define __builtin_vec_vcmpgtsh __builtin_altivec_vcmpgtsh
+#define __builtin_vec_vcmpgtuh __builtin_altivec_vcmpgtuh
+#define __builtin_vec_vcmpgtsw __builtin_altivec_vcmpgtsw
+#define __builtin_vec_vcmpgtuw __builtin_altivec_vcmpgtuw
+#define __builtin_vec_vcmpgtfp __builtin_altivec_vcmpgtfp
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmpgt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb(a, b);
+}
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmpgt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub(a, b);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmpgt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh(a, b);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmpgt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh(a, b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpgt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw(a, b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpgt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw(a, b);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmpgt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp(a, b);
+}
+
+/* vec_cmple */
+
+#define __builtin_vec_cmple vec_cmple
+
+static vector /*bool*/ int __attribute__((__always_inline__))
+vec_cmple(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp(b, a);
+}
+
+/* vec_cmplt */
+
+#define __builtin_vec_cmplt vec_cmplt
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmplt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb(b, a);
+}
+
+static vector /*bool*/ char _ATTRS_o_ai
+vec_cmplt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub(b, a);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmplt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh(b, a);
+}
+
+static vector /*bool*/ short _ATTRS_o_ai
+vec_cmplt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh(b, a);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmplt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw(b, a);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmplt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw(b, a);
+}
+
+static vector /*bool*/ int _ATTRS_o_ai
+vec_cmplt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp(b, a);
+}
+
+/* vec_max */
+
+#define __builtin_vec_vmaxsb __builtin_altivec_vmaxsb
+#define __builtin_vec_vmaxub __builtin_altivec_vmaxub
+#define __builtin_vec_vmaxsh __builtin_altivec_vmaxsh
+#define __builtin_vec_vmaxuh __builtin_altivec_vmaxuh
+#define __builtin_vec_vmaxsw __builtin_altivec_vmaxsw
+#define __builtin_vec_vmaxuw __builtin_altivec_vmaxuw
+#define __builtin_vec_vmaxfp __builtin_altivec_vmaxfp
+#define vec_vmaxsb __builtin_altivec_vmaxsb
+#define vec_vmaxub __builtin_altivec_vmaxub
+#define vec_vmaxsh __builtin_altivec_vmaxsh
+#define vec_vmaxuh __builtin_altivec_vmaxuh
+#define vec_vmaxsw __builtin_altivec_vmaxsw
+#define vec_vmaxuw __builtin_altivec_vmaxuw
+#define vec_vmaxfp __builtin_altivec_vmaxfp
+#define __builtin_vec_max vec_max
+
+static vector signed char _ATTRS_o_ai
+vec_max(vector signed  char a, vector signed char b)
+{
+  return __builtin_altivec_vmaxsb(a, b);
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_max(vector unsigned  char a, vector unsigned char b)
+{
+  return __builtin_altivec_vmaxub(a, b);
+}
+
+static vector short _ATTRS_o_ai
+vec_max(vector short a, vector short b)
+{
+  return __builtin_altivec_vmaxsh(a, b);
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_max(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vmaxuh(a, b);
+}
+
+static vector int _ATTRS_o_ai
+vec_max(vector int a, vector int b)
+{
+  return __builtin_altivec_vmaxsw(a, b);
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_max(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vmaxuw(a, b);
+}
+
+static vector float _ATTRS_o_ai
+vec_max(vector float a, vector float b)
+{
+  return __builtin_altivec_vmaxfp(a, b);
+}
+
+/* vec_mfvscr */
+
+#define __builtin_vec_mfvscr __builtin_altivec_mfvscr
+#define vec_mfvscr           __builtin_altivec_mfvscr
+
+/* vec_min */
+
+#define __builtin_vec_vminsb __builtin_altivec_vminsb
+#define __builtin_vec_vminub __builtin_altivec_vminub
+#define __builtin_vec_vminsh __builtin_altivec_vminsh
+#define __builtin_vec_vminuh __builtin_altivec_vminuh
+#define __builtin_vec_vminsw __builtin_altivec_vminsw
+#define __builtin_vec_vminuw __builtin_altivec_vminuw
+#define __builtin_vec_vminfp __builtin_altivec_vminfp
+#define vec_vminsb __builtin_altivec_vminsb
+#define vec_vminub __builtin_altivec_vminub
+#define vec_vminsh __builtin_altivec_vminsh
+#define vec_vminuh __builtin_altivec_vminuh
+#define vec_vminsw __builtin_altivec_vminsw
+#define vec_vminuw __builtin_altivec_vminuw
+#define vec_vminfp __builtin_altivec_vminfp
+#define __builtin_vec_min vec_min
+
+static vector signed char _ATTRS_o_ai
+vec_min(vector signed  char a, vector signed char b)
+{
+  return __builtin_altivec_vminsb(a, b);
+}
+
+static vector unsigned char _ATTRS_o_ai
+vec_min(vector unsigned  char a, vector unsigned char b)
+{
+  return __builtin_altivec_vminub(a, b);
+}
+
+static vector short _ATTRS_o_ai
+vec_min(vector short a, vector short b)
+{
+  return __builtin_altivec_vminsh(a, b);
+}
+
+static vector unsigned short _ATTRS_o_ai
+vec_min(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vminuh(a, b);
+}
+
+static vector int _ATTRS_o_ai
+vec_min(vector int a, vector int b)
+{
+  return __builtin_altivec_vminsw(a, b);
+}
+
+static vector unsigned int _ATTRS_o_ai
+vec_min(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vminuw(a, b);
+}
+
+static vector float _ATTRS_o_ai
+vec_min(vector float a, vector float b)
+{
+  return __builtin_altivec_vminfp(a, b);
+}
+
+/* vec_mtvscr */
+
+#define __builtin_vec_mtvscr __builtin_altivec_mtvscr
+#define vec_mtvscr           __builtin_altivec_mtvscr
+
+/* ------------------------------ predicates ------------------------------------ */
+
+static int __attribute__((__always_inline__))
+__builtin_vec_vcmpeq_p(char CR6_param, vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp_p(CR6_param, a, b);
+}
+
+static int __attribute__((__always_inline__))
+__builtin_vec_vcmpge_p(char CR6_param, vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp_p(CR6_param, a, b);
+}
+
+static int __attribute__((__always_inline__))
+__builtin_vec_vcmpgt_p(char CR6_param, vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(CR6_param, a, b);
+}
+
+/* vec_all_eq */
+
+static int _ATTRS_o_ai
+vec_all_eq(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_eq(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_LT, a, b);
+}
+
+/* vec_all_ge */
+
+static int _ATTRS_o_ai
+vec_all_ge(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_ge(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT, b, a);
+}
+
+/* vec_all_gt */
+
+static int _ATTRS_o_ai
+vec_all_gt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_gt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT, a, b);
+}
+
+/* vec_all_in */
+
+static int __attribute__((__always_inline__))
+vec_all_in(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpbfp_p(__CR6_EQ, a, b);
+}
+
+/* vec_all_le */
+
+static int _ATTRS_o_ai
+vec_all_le(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_le(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, a, b);
+}
+
+/* vec_all_lt */
+
+static int _ATTRS_o_ai
+vec_all_lt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_all_lt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT, b, a);
+}
+
+/* vec_all_nan */
+
+static int __attribute__((__always_inline__))
+vec_all_nan(vector float a)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, a, a);
+}
+
+/* vec_all_ne */
+
+static int _ATTRS_o_ai
+vec_all_ne(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b);
+}
+
+static int _ATTRS_o_ai
+vec_all_ne(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, a, b);
+}
+
+/* vec_all_nge */
+
+static int __attribute__((__always_inline__))
+vec_all_nge(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp_p(__CR6_EQ, a, b);
+}
+
+/* vec_all_ngt */
+
+static int __attribute__((__always_inline__))
+vec_all_ngt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, a, b);
+}
+
+/* vec_all_nle */
+
+static int __attribute__((__always_inline__))
+vec_all_nle(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp_p(__CR6_EQ, b, a);
+}
+
+/* vec_all_nlt */
+
+static int __attribute__((__always_inline__))
+vec_all_nlt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, b, a);
+}
+
+/* vec_all_numeric */
+
+static int __attribute__((__always_inline__))
+vec_all_numeric(vector float a)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_LT, a, a);
+}
+
+/* vec_any_eq */
+
+static int _ATTRS_o_ai
+vec_any_eq(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_eq(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, a, b);
+}
+
+/* vec_any_ge */
+
+static int _ATTRS_o_ai
+vec_any_ge(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_ge(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, b, a);
+}
+
+/* vec_any_gt */
+
+static int _ATTRS_o_ai
+vec_any_gt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_gt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, a, b);
+}
+
+/* vec_any_le */
+
+static int _ATTRS_o_ai
+vec_any_le(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_le(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, a, b);
+}
+
+/* vec_any_lt */
+
+static int _ATTRS_o_ai
+vec_any_lt(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, b, a);
+}
+
+static int _ATTRS_o_ai
+vec_any_lt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, b, a);
+}
+
+/* vec_any_nan */
+
+static int __attribute__((__always_inline__))
+vec_any_nan(vector float a)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, a, a);
+}
+
+/* vec_any_ne */
+
+static int _ATTRS_o_ai
+vec_any_ne(vector signed char a, vector signed char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector unsigned char a, vector unsigned char b)
+{
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector short a, vector short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector unsigned short a, vector unsigned short b)
+{
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector int a, vector int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, a, b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector unsigned int a, vector unsigned int b)
+{
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b);
+}
+
+static int _ATTRS_o_ai
+vec_any_ne(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, a, b);
+}
+
+/* vec_any_nge */
+
+static int __attribute__((__always_inline__))
+vec_any_nge(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, a, b);
+}
+
+/* vec_any_ngt */
+
+static int __attribute__((__always_inline__))
+vec_any_ngt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, a, b);
+}
+
+/* vec_any_nle */
+
+static int __attribute__((__always_inline__))
+vec_any_nle(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, b, a);
+}
+
+/* vec_any_nlt */
+
+static int __attribute__((__always_inline__))
+vec_any_nlt(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, b, a);
+}
+
+/* vec_any_numeric */
+
+static int __attribute__((__always_inline__))
+vec_any_numeric(vector float a)
+{
+  return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, a, a);
+}
+
+/* vec_any_out */
+
+static int __attribute__((__always_inline__))
+vec_any_out(vector float a, vector float b)
+{
+  return __builtin_altivec_vcmpbfp_p(__CR6_EQ_REV, a, b);
+}
+
+#undef _ATTRS_o_ai
+
+#endif /* __ALTIVEC_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/arm_neon.h b/contrib/llvm/tools/clang/lib/Headers/arm_neon.h
new file mode 100644
index 0000000..4508a27
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/arm_neon.h
@@ -0,0 +1,537 @@
+/*===---- arm_neon.h - NEON intrinsics --------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ARM_NEON_H
+#define __ARM_NEON_H
+
+#ifndef __ARM_NEON__
+#error "NEON support not enabled"
+#endif
+
+// NEON document appears to be specified in terms of stdint types.
+#include <stdint.h>
+
+// Define some NEON-specific scalar types for floats and polynomials.
+typedef float float32_t;
+typedef uint8_t poly8_t;
+
+// FIXME: probably need a 'poly' attribute or something for correct codegen to
+//        disambiguate from uint16_t.
+typedef uint16_t poly16_t;
+
+typedef __attribute__(( __vector_size__(8) ))  int8_t __neon_int8x8_t;
+typedef __attribute__(( __vector_size__(16) )) int8_t __neon_int8x16_t;
+typedef __attribute__(( __vector_size__(8) ))  int16_t __neon_int16x4_t;
+typedef __attribute__(( __vector_size__(16) )) int16_t __neon_int16x8_t;
+typedef __attribute__(( __vector_size__(8) ))  int32_t __neon_int32x2_t;
+typedef __attribute__(( __vector_size__(16) )) int32_t __neon_int32x4_t;
+typedef __attribute__(( __vector_size__(8) ))  int64_t __neon_int64x1_t;
+typedef __attribute__(( __vector_size__(16) )) int64_t __neon_int64x2_t;
+typedef __attribute__(( __vector_size__(8) ))  uint8_t __neon_uint8x8_t;
+typedef __attribute__(( __vector_size__(16) )) uint8_t __neon_uint8x16_t;
+typedef __attribute__(( __vector_size__(8) ))  uint16_t __neon_uint16x4_t;
+typedef __attribute__(( __vector_size__(16) )) uint16_t __neon_uint16x8_t;
+typedef __attribute__(( __vector_size__(8) ))  uint32_t __neon_uint32x2_t;
+typedef __attribute__(( __vector_size__(16) )) uint32_t __neon_uint32x4_t;
+typedef __attribute__(( __vector_size__(8) ))  uint64_t __neon_uint64x1_t;
+typedef __attribute__(( __vector_size__(16) )) uint64_t __neon_uint64x2_t;
+typedef __attribute__(( __vector_size__(8) ))  uint16_t __neon_float16x4_t;
+typedef __attribute__(( __vector_size__(16) )) uint16_t __neon_float16x8_t;
+typedef __attribute__(( __vector_size__(8) ))  float32_t __neon_float32x2_t;
+typedef __attribute__(( __vector_size__(16) )) float32_t __neon_float32x4_t;
+typedef __attribute__(( __vector_size__(8) ))  poly8_t __neon_poly8x8_t;
+typedef __attribute__(( __vector_size__(16) )) poly8_t __neon_poly8x16_t;
+typedef __attribute__(( __vector_size__(8) ))  poly16_t __neon_poly16x4_t;
+typedef __attribute__(( __vector_size__(16) )) poly16_t __neon_poly16x8_t;
+
+typedef struct __int8x8_t {
+  __neon_int8x8_t val;
+} int8x8_t;
+
+typedef struct __int8x16_t {
+  __neon_int8x16_t val;
+} int8x16_t;
+
+typedef struct __int16x4_t {
+  __neon_int16x4_t val;
+} int16x4_t;
+
+typedef struct __int16x8_t {
+  __neon_int16x8_t val;
+} int16x8_t;
+
+typedef struct __int32x2_t {
+  __neon_int32x2_t val;
+} int32x2_t;
+
+typedef struct __int32x4_t {
+  __neon_int32x4_t val;
+} int32x4_t;
+
+typedef struct __int64x1_t {
+  __neon_int64x1_t val;
+} int64x1_t;
+
+typedef struct __int64x2_t {
+  __neon_int64x2_t val;
+} int64x2_t;
+
+typedef struct __uint8x8_t {
+  __neon_uint8x8_t val;
+} uint8x8_t;
+
+typedef struct __uint8x16_t {
+  __neon_uint8x16_t val;
+} uint8x16_t;
+
+typedef struct __uint16x4_t {
+  __neon_uint16x4_t val;
+} uint16x4_t;
+
+typedef struct __uint16x8_t {
+  __neon_uint16x8_t val;
+} uint16x8_t;
+
+typedef struct __uint32x2_t {
+  __neon_uint32x2_t val;
+} uint32x2_t;
+
+typedef struct __uint32x4_t {
+  __neon_uint32x4_t val;
+} uint32x4_t;
+
+typedef struct __uint64x1_t {
+  __neon_uint64x1_t val;
+} uint64x1_t;
+
+typedef struct __uint64x2_t {
+  __neon_uint64x2_t val;
+} uint64x2_t;
+
+typedef struct __float16x4_t {
+  __neon_float16x4_t val;
+} float16x4_t;
+
+typedef struct __float16x8_t {
+  __neon_float16x8_t val;
+} float16x8_t;
+
+typedef struct __float32x2_t {
+  __neon_float32x2_t val;
+} float32x2_t;
+
+typedef struct __float32x4_t {
+  __neon_float32x4_t val;
+} float32x4_t;
+
+typedef struct __poly8x8_t {
+  __neon_poly8x8_t val;
+} poly8x8_t;
+
+typedef struct __poly8x16_t {
+  __neon_poly8x16_t val;
+} poly8x16_t;
+
+typedef struct __poly16x4_t {
+  __neon_poly16x4_t val;
+} poly16x4_t;
+
+typedef struct __poly16x8_t {
+  __neon_poly16x8_t val;
+} poly16x8_t;
+
+// FIXME: write tool to stamp out the structure-of-array types, possibly gen this whole file.
+
+// Intrinsics, per ARM document DUI0348B
+#define __ai static __attribute__((__always_inline__))
+
+#define INTTYPES_WIDE(op, builtin) \
+  __ai int16x8_t op##_s8(int16x8_t a, int8x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \
+  __ai int32x4_t op##_s16(int32x4_t a, int16x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \
+  __ai int64x2_t op##_s32(int64x2_t a, int32x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x8_t op##_u8(uint16x8_t a, uint8x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x4_t op##_u16(uint32x4_t a, uint16x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint64x2_t op##_u32(uint64x2_t a, uint32x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; }
+
+#define INTTYPES_WIDENING(op, builtin) \
+  __ai int16x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \
+  __ai int32x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \
+  __ai int64x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint64x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; }
+
+#define INTTYPES_WIDENING_MUL(op, builtin) \
+  __ai int16x8_t op##_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return (int16x8_t){ builtin(a.val, b.val, c.val) }; } \
+  __ai int32x4_t op##_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return (int32x4_t){ builtin(a.val, b.val, c.val) }; } \
+  __ai int64x2_t op##_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return (int64x2_t){ builtin(a.val, b.val, c.val) }; } \
+  __ai uint16x8_t op##_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return (uint16x8_t){ builtin(a.val, b.val, c.val) }; } \
+  __ai uint32x4_t op##_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return (uint32x4_t){ builtin(a.val, b.val, c.val) }; } \
+  __ai uint64x2_t op##_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return (uint64x2_t){ builtin(a.val, b.val, c.val) }; }
+
+#define INTTYPES_NARROWING(op, builtin) \
+  __ai int8x8_t op##_s16(int16x8_t a, int16x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \
+  __ai int16x4_t op##_s32(int32x4_t a, int32x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \
+  __ai int32x2_t op##_s64(int64x2_t a, int64x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint8x8_t op##_u16(uint16x8_t a, uint16x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x4_t op##_u32(uint32x4_t a, uint32x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x2_t op##_u64(uint64x2_t a, uint64x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; }
+
+#define INTTYPES_ADD_32(op, builtin) \
+  __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \
+  __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \
+  __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \
+  __ai int8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){ builtin(a.val, b.val) }; } \
+  __ai int16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \
+  __ai int32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; }
+
+#define INTTYPES_ADD_64(op, builtin) \
+  __ai int64x1_t op##_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){ builtin(a.val, b.val) }; } \
+  __ai uint64x1_t op##_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){ builtin(a.val, b.val) }; } \
+  __ai int64x2_t op##q_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint64x2_t op##q_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; }
+
+#define FLOATTYPES_CMP(op, builtin) \
+  __ai uint32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; }
+
+#define INT_FLOAT_CMP_OP(op, cc) \
+  __ai uint8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (uint8x8_t){(__neon_uint8x8_t)(a.val cc b.val)}; } \
+  __ai uint16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (uint16x4_t){(__neon_uint16x4_t)(a.val cc b.val)}; } \
+  __ai uint32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (uint32x2_t){(__neon_uint32x2_t)(a.val cc b.val)}; } \
+  __ai uint32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (uint32x2_t){(__neon_uint32x2_t)(a.val cc b.val)}; } \
+  __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){a.val cc b.val}; } \
+  __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){a.val cc b.val}; } \
+  __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){a.val cc b.val}; } \
+  __ai uint8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (uint8x16_t){(__neon_uint8x16_t)(a.val cc b.val)}; } \
+  __ai uint16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (uint16x8_t){(__neon_uint16x8_t)(a.val cc b.val)}; } \
+  __ai uint32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (uint32x4_t){(__neon_uint32x4_t)(a.val cc b.val)}; } \
+  __ai uint32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (uint32x4_t){(__neon_uint32x4_t)(a.val cc b.val)}; } \
+  __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){a.val cc b.val}; } \
+  __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){a.val cc b.val}; } \
+  __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){a.val cc b.val}; } 
+
+#define INT_UNARY(op, builtin) \
+  __ai int8x8_t op##_s8(int8x8_t a) { return (int8x8_t){ builtin(a.val) }; } \
+  __ai int16x4_t op##_s16(int16x4_t a) { return (int16x4_t){ builtin(a.val) }; } \
+  __ai int32x2_t op##_s32(int32x2_t a) { return (int32x2_t){ builtin(a.val) }; } \
+  __ai int8x16_t op##q_s8(int8x16_t a) { return (int8x16_t){ builtin(a.val) }; } \
+  __ai int16x8_t op##q_s16(int16x8_t a) { return (int16x8_t){ builtin(a.val) }; } \
+  __ai int32x4_t op##q_s32(int32x4_t a) { return (int32x4_t){ builtin(a.val) }; }
+
+#define FP_UNARY(op, builtin) \
+  __ai float32x2_t op##_f32(float32x2_t a) { return (float32x2_t){ builtin(a.val) }; } \
+  __ai float32x4_t op##q_f32(float32x4_t a) { return (float32x4_t){ builtin(a.val) }; }
+
+#define FP_BINARY(op, builtin) \
+  __ai float32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){ builtin(a.val, b.val) }; } \
+  __ai float32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (float32x4_t){ builtin(a.val, b.val) }; }
+
+#define INT_FP_PAIRWISE_ADD(op, builtin) \
+  __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \
+  __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \
+  __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \
+  __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \
+  __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \
+  __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \
+  __ai float32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){ builtin(a.val, b.val) }; }
+
+#define INT_LOGICAL_OP(op, lop) \
+  __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ a.val lop b.val }; } \
+  __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ a.val lop b.val }; } \
+  __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ a.val lop b.val }; } \
+  __ai int64x1_t op##_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){ a.val lop b.val }; } \
+  __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ a.val lop b.val }; } \
+  __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ a.val lop b.val }; } \
+  __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ a.val lop b.val }; } \
+  __ai uint64x1_t op##_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){ a.val lop b.val }; } \
+  __ai int8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){ a.val lop b.val }; } \
+  __ai int16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){ a.val lop b.val }; } \
+  __ai int32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){ a.val lop b.val }; } \
+  __ai int64x2_t op##q_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){ a.val lop b.val }; } \
+  __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){ a.val lop b.val }; } \
+  __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){ a.val lop b.val }; } \
+  __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){ a.val lop b.val }; } \
+  __ai uint64x2_t op##q_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){ a.val lop b.val }; }
+
+// vector add
+__ai int8x8_t vadd_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){a.val + b.val}; }
+__ai int16x4_t vadd_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){a.val + b.val}; }
+__ai int32x2_t vadd_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){a.val + b.val}; }
+__ai int64x1_t vadd_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){a.val + b.val}; }
+__ai float32x2_t vadd_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){a.val + b.val}; }
+__ai uint8x8_t vadd_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){a.val + b.val}; }
+__ai uint16x4_t vadd_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){a.val + b.val}; }
+__ai uint32x2_t vadd_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){a.val + b.val}; }
+__ai uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){a.val + b.val}; }
+__ai int8x16_t vaddq_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){a.val + b.val}; }
+__ai int16x8_t vaddq_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){a.val + b.val}; }
+__ai int32x4_t vaddq_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){a.val + b.val}; }
+__ai int64x2_t vaddq_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){a.val + b.val}; }
+__ai float32x4_t vaddq_f32(float32x4_t a, float32x4_t b) { return (float32x4_t){a.val + b.val}; }
+__ai uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){a.val + b.val}; }
+__ai uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){a.val + b.val}; }
+__ai uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){a.val + b.val}; }
+__ai uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){a.val + b.val}; }
+
+// vector long add
+INTTYPES_WIDENING(vaddl, __builtin_neon_vaddl)
+
+// vector wide add
+INTTYPES_WIDE(vaddw, __builtin_neon_vaddw)
+
+// halving add
+// rounding halving add
+INTTYPES_ADD_32(vhadd, __builtin_neon_vhadd)
+INTTYPES_ADD_32(vrhadd, __builtin_neon_vrhadd)
+
+// saturating add
+INTTYPES_ADD_32(vqadd, __builtin_neon_vqadd)
+INTTYPES_ADD_64(vqadd, __builtin_neon_vqadd)
+
+// add high half
+// rounding add high half
+INTTYPES_NARROWING(vaddhn, __builtin_neon_vaddhn)
+INTTYPES_NARROWING(vraddhn, __builtin_neon_vraddhn)
+
+// multiply
+// mul-poly
+
+// multiple accumulate
+// multiple subtract
+
+// multiple accumulate long
+// multiple subtract long
+INTTYPES_WIDENING_MUL(vmlal, __builtin_neon_vmlal)
+INTTYPES_WIDENING_MUL(vmlsl, __builtin_neon_vmlsl)
+
+// saturating doubling multiply high 
+// saturating rounding doubling multiply high 
+
+// saturating doubling multiply accumulate long 
+// saturating doubling multiply subtract long 
+
+// long multiply
+// long multiply-poly
+INTTYPES_WIDENING(vmull, __builtin_neon_vmull)
+__ai poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b) { return (poly16x8_t){ __builtin_neon_vmull(a.val, b.val) }; }
+
+// saturating doubling long multiply
+
+// subtract
+
+// long subtract
+INTTYPES_WIDENING(vsubl, __builtin_neon_vsubl)
+
+// wide subtract
+INTTYPES_WIDE(vsubw, __builtin_neon_vsubw)
+
+// saturating subtract
+INTTYPES_ADD_32(vqsub, __builtin_neon_vqsub)
+INTTYPES_ADD_64(vqsub, __builtin_neon_vqsub)
+
+// halving subtract
+INTTYPES_ADD_32(vhsub, __builtin_neon_vhsub)
+
+// subtract high half
+// rounding subtract high half
+INTTYPES_NARROWING(vsubhn, __builtin_neon_vsubhn)
+INTTYPES_NARROWING(vrsubhn, __builtin_neon_vrsubhn)
+
+// compare eq
+// compare ge
+// compare le
+// compare gt
+// compare lt
+INT_FLOAT_CMP_OP(vceq, ==)
+INT_FLOAT_CMP_OP(vcge, >=)
+INT_FLOAT_CMP_OP(vcle, <=)
+INT_FLOAT_CMP_OP(vcgt, >)
+INT_FLOAT_CMP_OP(vclt, <)
+
+// compare eq-poly
+
+// compare abs ge
+// compare abs le
+// compare abs gt
+// compare abs lt
+FLOATTYPES_CMP(vcage, __builtin_neon_vcage)
+FLOATTYPES_CMP(vcale, __builtin_neon_vcale)
+FLOATTYPES_CMP(vcagt, __builtin_neon_vcagt)
+FLOATTYPES_CMP(vcalt, __builtin_neon_vcalt)
+
+// test bits
+
+// abs diff
+INTTYPES_ADD_32(vabd, __builtin_neon_vabd)
+FP_BINARY(vabd, __builtin_neon_vabd)
+
+// abs diff long
+INTTYPES_WIDENING(vabdl, __builtin_neon_vabdl)
+
+// abs diff accumulate
+// abs diff accumulate long
+
+// max
+// min
+INTTYPES_ADD_32(vmax, __builtin_neon_vmax)
+FP_BINARY(vmax, __builtin_neon_vmax)
+INTTYPES_ADD_32(vmin, __builtin_neon_vmin)
+FP_BINARY(vmin, __builtin_neon_vmin)
+
+// pairwise add
+// pairwise max
+// pairwise min
+INT_FP_PAIRWISE_ADD(vpadd, __builtin_neon_vpadd)
+INT_FP_PAIRWISE_ADD(vpmax, __builtin_neon_vpmax)
+INT_FP_PAIRWISE_ADD(vpmin, __builtin_neon_vpmin)
+
+// long pairwise add
+// long pairwise add accumulate
+
+// recip
+// recip sqrt
+FP_BINARY(vrecps, __builtin_neon_vrecps)
+FP_BINARY(vrsqrts, __builtin_neon_vrsqrts)
+
+// shl by vec
+// saturating shl by vec
+// rounding shl by vec
+// saturating rounding shl by vec
+
+// shr by constant
+// shl by constant
+// rounding shr by constant
+// shr by constant and accumulate
+// rounding shr by constant and accumulate
+// saturating shl by constant
+// s->u saturating shl by constant
+// narrowing saturating shr by constant
+// s->u narrowing saturating shr by constant
+// s->u rounding narrowing saturating shr by constant
+// narrowing saturating shr by constant
+// rounding narrowing shr by constant
+// rounding narrowing saturating shr by constant
+// widening shl by constant
+
+// shr and insert
+// shl and insert
+
+// loads and stores, single vector
+// loads and stores, lane
+// loads, dupe
+
+// loads and stores, arrays
+
+// vget,vgetq lane
+// vset, vsetq lane
+
+// vcreate
+// vdup, vdupq
+// vmov, vmovq
+// vdup_lane, vdupq_lane
+// vcombine
+// vget_high, vget_low
+
+// vcvt {u,s} <-> f, f <-> f16
+// narrow
+// long move (unpack)
+// saturating narrow
+// saturating narrow s->u
+
+// table lookup
+// extended table lookup
+
+// mla with scalar
+// widening mla with scalar
+// widening saturating doubling mla with scalar
+// mls with scalar
+// widening mls with scalar
+// widening saturating doubling mls with scalar
+// mul by scalar
+// long mul with scalar
+// long mul by scalar
+// saturating doubling long mul with scalar
+// saturating doubling long mul by scalar
+// saturating doubling mul high with scalar
+// saturating doubling mul high by scalar
+// saturating rounding doubling mul high with scalar
+// saturating rounding doubling mul high by scalar
+// mla with scalar
+// widening mla with sclar
+// widening saturating doubling mla with scalar
+// mls with scalar
+// widening mls with scalar
+// widening saturating doubling mls with scalar
+
+// extract
+
+// endian swap (vrev)
+
+// negate
+
+// abs
+// saturating abs
+// saturating negate
+// count leading signs
+INT_UNARY(vabs, __builtin_neon_vabs)
+FP_UNARY(vabs, __builtin_neon_vabs)
+INT_UNARY(vqabs, __builtin_neon_vqabs)
+INT_UNARY(vqneg, __builtin_neon_vqneg)
+INT_UNARY(vcls, __builtin_neon_vcls)
+
+// count leading zeroes
+// popcount
+
+// recip_est
+// recip_sqrt_est
+
+// not-poly
+// not
+
+// and
+// or
+// xor
+// andn
+// orn
+INT_LOGICAL_OP(vand, &)
+INT_LOGICAL_OP(vorr, |)
+INT_LOGICAL_OP(veor, ^)
+INT_LOGICAL_OP(vbic, &~)
+INT_LOGICAL_OP(vorn, |~)
+
+// bitselect
+
+// transpose elts
+// interleave elts
+// deinterleave elts
+
+// vreinterpret
+
+#endif /* __ARM_NEON_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
new file mode 100644
index 0000000..8afbe76
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
@@ -0,0 +1,1363 @@
+/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+ 
+#ifndef __EMMINTRIN_H
+#define __EMMINTRIN_H
+
+#ifndef __SSE2__
+#error "SSE2 instruction set not enabled"
+#else
+
+#include <xmmintrin.h>
+
+typedef double __m128d __attribute__((__vector_size__(16)));
+typedef long long __m128i __attribute__((__vector_size__(16)));
+
+typedef short __v8hi __attribute__((__vector_size__(16)));
+typedef char __v16qi __attribute__((__vector_size__(16)));
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_add_sd(__m128d a, __m128d b)
+{
+  a[0] += b[0];
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_add_pd(__m128d a, __m128d b)
+{
+  return a + b;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_sub_sd(__m128d a, __m128d b)
+{
+  a[0] -= b[0];
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_sub_pd(__m128d a, __m128d b)
+{
+  return a - b;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_mul_sd(__m128d a, __m128d b)
+{
+  a[0] *= b[0];
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_mul_pd(__m128d a, __m128d b)
+{
+  return a * b;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_div_sd(__m128d a, __m128d b)
+{
+  a[0] /= b[0];
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_div_pd(__m128d a, __m128d b)
+{
+  return a / b;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_sqrt_sd(__m128d a, __m128d b)
+{
+  __m128d c = __builtin_ia32_sqrtsd(b);
+  return (__m128d) { c[0], a[1] };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_sqrt_pd(__m128d a)
+{
+  return __builtin_ia32_sqrtpd(a);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_min_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_minsd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_min_pd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_minpd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_max_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_maxsd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_max_pd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_maxpd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_and_pd(__m128d a, __m128d b)
+{
+  return (__m128d)((__v4si)a & (__v4si)b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_andnot_pd(__m128d a, __m128d b)
+{
+  return (__m128d)(~(__v4si)a & (__v4si)b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_or_pd(__m128d a, __m128d b)
+{
+  return (__m128d)((__v4si)a | (__v4si)b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_xor_pd(__m128d a, __m128d b)
+{
+  return (__m128d)((__v4si)a ^ (__v4si)b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 0);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(b, a, 1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(b, a, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpord_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 7);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpunord_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 3);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 4);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnlt_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 5);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnle_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(a, b, 6);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpngt_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(b, a, 5);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnge_pd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmppd(b, a, 6);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpord_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpunord_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnlt_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnle_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpngt_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnge_sd(__m128d a, __m128d b)
+{
+  return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comieq_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_comisdeq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comilt_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_comisdlt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comile_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_comisdle(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comigt_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_comisdgt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comineq_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_comisdneq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomieq_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_ucomisdeq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomilt_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_ucomisdlt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomile_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_ucomisdle(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomigt_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_ucomisdgt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomineq_sd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_ucomisdneq(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpd_ps(__m128d a)
+{
+  return __builtin_ia32_cvtpd2ps(a);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtps_pd(__m128 a)
+{
+  return __builtin_ia32_cvtps2pd(a);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi32_pd(__m128i a)
+{
+  return __builtin_ia32_cvtdq2pd((__v4si)a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpd_epi32(__m128d a)
+{
+  return __builtin_ia32_cvtpd2dq(a);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsd_si32(__m128d a)
+{
+  return __builtin_ia32_cvtsd2si(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsd_ss(__m128 a, __m128d b)
+{
+  a[0] = b[0];
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi32_sd(__m128d a, int b)
+{
+  a[0] = b;
+  return a;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtss_sd(__m128d a, __m128 b)
+{
+  a[0] = b[0];
+  return a;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvttpd_epi32(__m128d a)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq(a);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvttsd_si32(__m128d a)
+{
+  return a[0];
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpd_pi32(__m128d a)
+{
+  return (__m64)__builtin_ia32_cvtpd2pi(a);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvttpd_pi32(__m128d a)
+{
+  return (__m64)__builtin_ia32_cvttpd2pi(a);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpi32_pd(__m64 a)
+{
+  return __builtin_ia32_cvtpi2pd((__v2si)a);
+}
+
+static __inline__ double __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsd_f64(__m128d a)
+{
+  return a[0];
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_load_pd(double const *dp)
+{
+  return *(__m128d*)dp;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_load1_pd(double const *dp)
+{
+  return (__m128d){ dp[0], dp[0] };
+}
+
+#define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_loadr_pd(double const *dp)
+{
+  return (__m128d){ dp[1], dp[0] };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_loadu_pd(double const *dp)
+{
+  return __builtin_ia32_loadupd(dp);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_load_sd(double const *dp)
+{
+  return (__m128d){ *dp, 0.0 };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_loadh_pd(__m128d a, double const *dp)
+{
+  return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_loadl_pd(__m128d a, double const *dp)
+{
+  return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_set_sd(double w)
+{
+  return (__m128d){ w, 0 };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_set1_pd(double w)
+{
+  return (__m128d){ w, w };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_set_pd(double w, double x)
+{
+  return (__m128d){ x, w };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_setr_pd(double w, double x)
+{
+  return (__m128d){ w, x };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_setzero_pd(void)
+{
+  return (__m128d){ 0, 0 };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_move_sd(__m128d a, __m128d b)
+{
+  return (__m128d){ b[0], a[1] };
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store_sd(double *dp, __m128d a)
+{
+  dp[0] = a[0];
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store1_pd(double *dp, __m128d a)
+{
+  dp[0] = a[0];
+  dp[1] = a[0];
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store_pd(double *dp, __m128d a)
+{
+  *(__m128d *)dp = a;
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storeu_pd(double *dp, __m128d a)
+{
+  __builtin_ia32_storeupd(dp, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storer_pd(double *dp, __m128d a)
+{
+  dp[0] = a[1];
+  dp[1] = a[0];
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storeh_pd(double *dp, __m128d a)
+{
+  dp[0] = a[1];
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storel_pd(double *dp, __m128d a)
+{
+  dp[0] = a[0];
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_add_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)((__v16qi)a + (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_add_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)((__v8hi)a + (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_add_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)((__v4si)a + (__v4si)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_add_si64(__m64 a, __m64 b)
+{
+  return a + b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_add_epi64(__m128i a, __m128i b)
+{
+  return a + b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_adds_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_adds_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_adds_epu8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_adds_epu16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_avg_epu8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_avg_epu16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_madd_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epu8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epu8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mulhi_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mulhi_epu16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mullo_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)((__v8hi)a * (__v8hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_mul_su32(__m64 a, __m64 b)
+{
+  return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mul_epu32(__m128i a, __m128i b)
+{
+  return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sad_epu8(__m128i a, __m128i b)
+{
+  return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sub_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)((__v16qi)a - (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sub_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)((__v8hi)a - (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sub_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)((__v4si)a - (__v4si)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_si64(__m64 a, __m64 b)
+{
+  return a - b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sub_epi64(__m128i a, __m128i b)
+{
+  return a - b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_subs_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_subs_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_subs_epu8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_subs_epu16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_and_si128(__m128i a, __m128i b)
+{
+  return a & b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_andnot_si128(__m128i a, __m128i b)
+{
+  return ~a & b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_or_si128(__m128i a, __m128i b)
+{
+  return a | b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_xor_si128(__m128i a, __m128i b)
+{
+  return a ^ b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_slli_si128(__m128i a, int imm)
+{
+  return __builtin_ia32_pslldqi128(a, imm * 8);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_slli_epi16(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sll_epi16(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_slli_epi32(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sll_epi32(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_slli_epi64(__m128i a, int count)
+{
+  return __builtin_ia32_psllqi128(a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sll_epi64(__m128i a, __m128i count)
+{
+  return __builtin_ia32_psllq128(a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srai_epi16(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sra_epi16(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srai_epi32(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sra_epi32(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srli_si128(__m128i a, int imm)
+{
+  return __builtin_ia32_psrldqi128(a, imm * 8);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srli_epi16(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srl_epi16(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srli_epi32(__m128i a, int count)
+{
+  return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srl_epi32(__m128i a, __m128i count)
+{
+  return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srli_epi64(__m128i a, int count)
+{
+  return __builtin_ia32_psrlqi128(a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_srl_epi64(__m128i a, __m128i count)
+{
+  return __builtin_ia32_psrlq128(a, count);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)((__v16qi)a == (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)((__v8hi)a == (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)((__v4si)a == (__v4si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)((__v16qi)a > (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)((__v8hi)a > (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)((__v4si)a > (__v4si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi8(__m128i a, __m128i b)
+{
+  return _mm_cmpgt_epi8(b,a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi16(__m128i a, __m128i b)
+{
+  return _mm_cmpgt_epi16(b,a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi32(__m128i a, __m128i b)
+{
+  return _mm_cmpgt_epi32(b,a);
+}
+
+#ifdef __x86_64__
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi64_sd(__m128d a, long long b)
+{
+  a[0] = b;
+  return a;
+}
+
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsd_si64(__m128d a)
+{
+  return __builtin_ia32_cvtsd2si64(a);
+}
+
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvttsd_si64(__m128d a)
+{
+  return a[0];
+}
+#endif
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi32_ps(__m128i a)
+{
+  return __builtin_ia32_cvtdq2ps((__v4si)a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtps_epi32(__m128 a)
+{
+  return (__m128i)__builtin_ia32_cvtps2dq(a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvttps_epi32(__m128 a)
+{
+  return (__m128i)__builtin_ia32_cvttps2dq(a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi32_si128(int a)
+{
+  return (__m128i)(__v4si){ a, 0, 0, 0 };
+}
+
+#ifdef __x86_64__
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi64_si128(long long a)
+{
+  return (__m128i){ a, 0 };
+}
+#endif
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi128_si32(__m128i a)
+{
+  __v4si b = (__v4si)a;
+  return b[0];
+}
+
+#ifdef __x86_64__
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi128_si64(__m128i a)
+{
+  return a[0];
+}
+#endif
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_load_si128(__m128i const *p)
+{
+  return *p;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_loadu_si128(__m128i const *p)
+{
+  return (__m128i)__builtin_ia32_loaddqu((char const *)p);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_loadl_epi64(__m128i const *p)
+{
+  return (__m128i) { *(long long*)p, 0};
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set_epi64x(long long q1, long long q0)
+{
+  return (__m128i){ q0, q1 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set_epi64(__m64 q1, __m64 q0)
+{
+  return (__m128i){ (long long)q0, (long long)q1 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set_epi32(int i3, int i2, int i1, int i0)
+{
+  return (__m128i)(__v4si){ i0, i1, i2, i3};
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
+{
+  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
+{
+  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set1_epi64x(long long q)
+{
+  return (__m128i){ q, q };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set1_epi64(__m64 q)
+{
+  return (__m128i){ (long long)q, (long long)q };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set1_epi32(int i)
+{
+  return (__m128i)(__v4si){ i, i, i, i };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set1_epi16(short w)
+{
+  return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_set1_epi8(char b)
+{
+  return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_setr_epi64(__m64 q0, __m64 q1)
+{
+  return (__m128i){ (long long)q0, (long long)q1 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_setr_epi32(int i0, int i1, int i2, int i3)
+{
+  return (__m128i)(__v4si){ i0, i1, i2, i3};
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
+{
+  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
+{
+  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_setzero_si128(void)
+{
+  return (__m128i){ 0LL, 0LL };
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store_si128(__m128i *p, __m128i b)
+{
+  *p = b;
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storeu_si128(__m128i *p, __m128i b)
+{
+  __builtin_ia32_storedqu((char *)p, (__v16qi)b);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
+{
+  __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storel_epi64(__m128i *p, __m128i a)
+{
+  __builtin_ia32_storelv4si((__v2si *)p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_pd(double *p, __m128d a)
+{
+  __builtin_ia32_movntpd(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_si128(__m128i *p, __m128i a)
+{
+  __builtin_ia32_movntdq(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_si32(int *p, int a)
+{
+  __builtin_ia32_movnti(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_clflush(void const *p)
+{
+  __builtin_ia32_clflush(p);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_lfence(void)
+{
+  __builtin_ia32_lfence();
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_mfence(void)
+{
+  __builtin_ia32_mfence();
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_packs_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_packs_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_packus_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_extract_epi16(__m128i a, int imm)
+{
+  __v8hi b = (__v8hi)a;
+  return b[imm];
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_insert_epi16(__m128i a, int b, int imm)
+{
+  __v8hi c = (__v8hi)a;
+  c[imm & 7] = b;
+  return (__m128i)c;
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_movemask_epi8(__m128i a)
+{
+  return __builtin_ia32_pmovmskb128((__v16qi)a);
+}
+
+#define _mm_shuffle_epi32(a, imm) \
+  ((__m128i)__builtin_shufflevector((__v4si)(a), (__v4si) {0}, \
+                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
+                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6))
+#define _mm_shufflelo_epi16(a, imm) \
+  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, \
+                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
+                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
+                                    4, 5, 6, 7))
+#define _mm_shufflehi_epi16(a, imm) \
+  ((__m128i)__builtin_shufflevector((__v8hi)(a), (__v8hi) {0}, 0, 1, 2, 3, \
+                                    4 + ((imm) & 0x3), 4 + ((imm) & 0xc) >> 2, \
+                                    4 + ((imm) & 0x30) >> 4, \
+                                    4 + ((imm) & 0xc0) >> 6))
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_epi64(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_epi8(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_epi16(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_epi32(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_epi64(__m128i a, __m128i b)
+{
+  return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_movepi64_pi64(__m128i a)
+{
+  return (__m64)a[0];
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_movpi64_pi64(__m64 a)
+{
+  return (__m128i){ (long long)a, 0 };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_move_epi64(__m128i a)
+{
+  return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_pd(__m128d a, __m128d b)
+{
+  return __builtin_shufflevector(a, b, 1, 2+1);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_pd(__m128d a, __m128d b)
+{
+  return __builtin_shufflevector(a, b, 0, 2+0);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_movemask_pd(__m128d a)
+{
+  return __builtin_ia32_movmskpd(a);
+}
+
+#define _mm_shuffle_pd(a, b, i) \
+  (__builtin_shufflevector((__m128d)(a), (__m128d)(b), (i) & 1, \
+                                                       (((i) & 2) >> 1) + 2))
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_castpd_ps(__m128d in)
+{
+  return (__m128)in;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_castpd_si128(__m128d in)
+{
+  return (__m128i)in;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_castps_pd(__m128 in)
+{
+  return (__m128d)in;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_castps_si128(__m128 in)
+{
+  return (__m128i)in;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_castsi128_ps(__m128i in)
+{
+  return (__m128)in;
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_castsi128_pd(__m128i in)
+{
+  return (__m128d)in;
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_pause(void)
+{
+  __asm__ volatile ("pause");
+}
+
+#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
+
+#endif /* __SSE2__ */
+
+#endif /* __EMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/float.h b/contrib/llvm/tools/clang/lib/Headers/float.h
new file mode 100644
index 0000000..28fb882
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/float.h
@@ -0,0 +1,71 @@
+/*===---- float.h - Characteristics of floating point types ----------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __FLOAT_H
+#define __FLOAT_H
+
+/* Characteristics of floating point types, C99 5.2.4.2.2 */
+
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+#define FLT_ROUNDS (__builtin_flt_rounds())
+#define FLT_RADIX __FLT_RADIX__
+
+#define FLT_MANT_DIG __FLT_MANT_DIG__
+#define DBL_MANT_DIG __DBL_MANT_DIG__
+#define LDBL_MANT_DIG __LDBL_MANT_DIG__
+
+#define DECIMAL_DIG __DECIMAL_DIG__
+
+#define FLT_DIG __FLT_DIG__
+#define DBL_DIG __DBL_DIG__
+#define LDBL_DIG __LDBL_DIG__
+
+#define FLT_MIN_EXP __FLT_MIN_EXP__
+#define DBL_MIN_EXP __DBL_MIN_EXP__
+#define LDBL_MIN_EXP __LDBL_MIN_EXP__
+
+#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__
+#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__
+#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
+
+#define FLT_MAX_EXP __FLT_MAX_EXP__
+#define DBL_MAX_EXP __DBL_MAX_EXP__
+#define LDBL_MAX_EXP __LDBL_MAX_EXP__
+
+#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__
+#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__
+#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
+
+#define FLT_MAX __FLT_MAX__
+#define DBL_MAX __DBL_MAX__
+#define LDBL_MAX __LDBL_MAX__
+
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+#define LDBL_EPSILON __LDBL_EPSILON__
+
+#define FLT_MIN __FLT_MIN__
+#define DBL_MIN __DBL_MIN__
+#define LDBL_MIN __LDBL_MIN__
+
+#endif /* __FLOAT_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/iso646.h b/contrib/llvm/tools/clang/lib/Headers/iso646.h
new file mode 100644
index 0000000..dca13c5
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/iso646.h
@@ -0,0 +1,43 @@
+/*===---- iso646.h - Standard header for alternate spellings of operators---===
+ *
+ * Copyright (c) 2008 Eli Friedman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ISO646_H
+#define __ISO646_H
+
+#ifndef __cplusplus
+#define and    &&
+#define and_eq &=
+#define bitand &
+#define bitor  |
+#define compl  ~
+#define not    !
+#define not_eq !=
+#define or     ||
+#define or_eq  |=
+#define xor    ^
+#define xor_eq ^=
+#endif
+
+#endif /* __ISO646_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/limits.h b/contrib/llvm/tools/clang/lib/Headers/limits.h
new file mode 100644
index 0000000..2627533
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/limits.h
@@ -0,0 +1,113 @@
+/*===---- limits.h - Standard header for integer sizes --------------------===*\
+ *
+ * Copyright (c) 2009 Chris Lattner
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#ifndef __CLANG_LIMITS_H
+#define __CLANG_LIMITS_H
+
+/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.
+   Avert this #include_next madness. */
+#if defined __GNUC__ && !defined _GCC_LIMITS_H_
+#define _GCC_LIMITS_H_
+#endif
+
+/* System headers include a number of constants from POSIX in <limits.h>. */
+#include_next <limits.h>
+
+/* Many system headers try to "help us out" by defining these.  No really, we
+   know how big each datatype is. */
+#undef  SCHAR_MIN
+#undef  SCHAR_MAX
+#undef  UCHAR_MAX
+#undef  SHRT_MIN
+#undef  SHRT_MAX
+#undef  USHRT_MAX
+#undef  INT_MIN
+#undef  INT_MAX
+#undef  UINT_MAX
+#undef  LONG_MIN
+#undef  LONG_MAX
+#undef  ULONG_MAX
+
+#undef  CHAR_BIT
+#undef  CHAR_MIN
+#undef  CHAR_MAX
+
+/* C90/99 5.2.4.2.1 */
+#define SCHAR_MAX __SCHAR_MAX__
+#define SHRT_MAX  __SHRT_MAX__
+#define INT_MAX   __INT_MAX__
+#define LONG_MAX  __LONG_MAX__
+
+#define SCHAR_MIN (-__SCHAR_MAX__-1)
+#define SHRT_MIN  (-__SHRT_MAX__ -1)
+#define INT_MIN   (-__INT_MAX__  -1)
+#define LONG_MIN  (-__LONG_MAX__ -1L)
+
+#define UCHAR_MAX (__SCHAR_MAX__*2  +1)
+#define USHRT_MAX (__SHRT_MAX__ *2  +1)
+#define UINT_MAX  (__INT_MAX__  *2U +1U)
+#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)
+
+#ifndef MB_LEN_MAX
+#define MB_LEN_MAX 1
+#endif
+
+#define CHAR_BIT  __CHAR_BIT__
+
+#ifdef __CHAR_UNSIGNED__  /* -funsigned-char */
+#define CHAR_MIN 0
+#define CHAR_MAX UCHAR_MAX
+#else
+#define CHAR_MIN SCHAR_MIN
+#define CHAR_MAX __SCHAR_MAX__
+#endif
+
+/* C99 5.2.4.2.1: Added long long. */
+#if __STDC_VERSION__ >= 199901
+
+#undef  LLONG_MIN
+#undef  LLONG_MAX
+#undef  ULLONG_MAX
+
+#define LLONG_MAX  __LONG_LONG_MAX__
+#define LLONG_MIN  (-__LONG_LONG_MAX__-1LL)
+#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)
+#endif
+
+/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension.  It's too bad
+   that we don't have something like #pragma poison that could be used to
+   deprecate a macro - the code should just use LLONG_MAX and friends.
+ */
+#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)
+
+#undef   LONG_LONG_MIN
+#undef   LONG_LONG_MAX
+#undef   ULONG_LONG_MAX
+
+#define LONG_LONG_MAX  __LONG_LONG_MAX__
+#define LONG_LONG_MIN  (-__LONG_LONG_MAX__-1LL)
+#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)
+#endif
+
+#endif /* __CLANG_LIMITS_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/mm_malloc.h b/contrib/llvm/tools/clang/lib/Headers/mm_malloc.h
new file mode 100644
index 0000000..fba8651
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/mm_malloc.h
@@ -0,0 +1,62 @@
+/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __MM_MALLOC_H
+#define __MM_MALLOC_H
+
+#include <errno.h>
+#include <stdlib.h>
+
+static __inline__ void *__attribute__((__always_inline__, __nodebug__))
+_mm_malloc(size_t size, size_t align)
+{
+  if (align & (align - 1)) {
+    errno = EINVAL;
+    return 0;
+  }
+
+  if (!size)
+    return 0;
+
+  if (align < 2 * sizeof(void *))
+    align = 2 * sizeof(void *);
+
+  void *mallocedMemory = malloc(size + align);
+  if (!mallocedMemory)
+    return 0;
+
+  void *alignedMemory =
+    (void *)(((size_t)mallocedMemory + align) & ~((size_t)align - 1));
+  ((void **)alignedMemory)[-1] = mallocedMemory;
+
+  return alignedMemory;
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_free(void *p)
+{
+  if (p)
+    free(((void **)p)[-1]);
+}
+
+#endif /* __MM_MALLOC_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
new file mode 100644
index 0000000..401d8a7
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
@@ -0,0 +1,449 @@
+/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __MMINTRIN_H
+#define __MMINTRIN_H
+
+#ifndef __MMX__
+#error "MMX instruction set not enabled"
+#else
+
+typedef long long __m64 __attribute__((__vector_size__(8)));
+
+typedef int __v2si __attribute__((__vector_size__(8)));
+typedef short __v4hi __attribute__((__vector_size__(8)));
+typedef char __v8qi __attribute__((__vector_size__(8)));
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_empty(void)
+{
+    __builtin_ia32_emms();
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi32_si64(int __i)
+{
+    return (__m64)(__v2si){__i, 0};
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi64_si32(__m64 __m)
+{
+    __v2si __mmx_var2 = (__v2si)__m;
+    return __mmx_var2[0];
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi64_m64(long long __i)
+{
+    return (__m64)__i;
+}
+
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvtm64_si64(__m64 __m)
+{
+    return (long long)__m;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_packs_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_packs_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_packs_pu16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 8+4, 5,
+                                          8+5, 6, 8+6, 7, 8+7);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 4+2, 3,
+                                          4+3);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 2+1);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8+0, 1,
+                                          8+1, 2, 8+2, 3, 8+3);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4+0, 1,
+                                          4+1);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2+0);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_add_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v8qi)__m1 + (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_add_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v4hi)__m1 + (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_add_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v2si)__m1 + (__v2si)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_adds_pi8(__m64 __m1, __m64 __m2) 
+{
+    return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_adds_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_adds_pu8(__m64 __m1, __m64 __m2) 
+{
+    return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
+}
+ 
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_adds_pu16(__m64 __m1, __m64 __m2) 
+{
+    return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v8qi)__m1 - (__v8qi)__m2);
+}
+ 
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v4hi)__m1 - (__v4hi)__m2);
+}
+ 
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v2si)__m1 - (__v2si)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_subs_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_subs_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_subs_pu8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
+}
+ 
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_subs_pu16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_madd_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
+}
+ 
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_mullo_pi16(__m64 __m1, __m64 __m2) 
+{
+    return (__m64)((__v4hi)__m1 * (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sll_pi16(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_slli_pi16(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sll_pi32(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_slli_pi32(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sll_si64(__m64 __m, __m64 __count)
+{
+    return __builtin_ia32_psllq(__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_slli_si64(__m64 __m, int __count)
+{
+    return __builtin_ia32_psllqi(__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sra_pi16(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srai_pi16(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sra_pi32(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srai_pi32(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srl_pi16(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srli_pi16(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srl_pi32(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);       
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srli_pi32(__m64 __m, int __count)
+{
+    return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srl_si64(__m64 __m, __m64 __count)
+{
+    return (__m64)__builtin_ia32_psrlq(__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_srli_si64(__m64 __m, int __count)
+{
+    return __builtin_ia32_psrlqi(__m, __count);    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_and_si64(__m64 __m1, __m64 __m2)
+{
+    return __m1 & __m2;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_andnot_si64(__m64 __m1, __m64 __m2)
+{
+    return ~__m1 & __m2;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_or_si64(__m64 __m1, __m64 __m2)
+{
+    return __m1 | __m2;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_xor_si64(__m64 __m1, __m64 __m2)
+{
+    return __m1 ^ __m2;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v8qi)__m1 == (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v4hi)__m1 == (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v2si)__m1 == (__v2si)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v8qi)__m1 > (__v8qi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
+{
+    return (__m64)((__v2si)__m1 > (__v2si)__m2);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_setzero_si64(void)
+{
+    return (__m64){ 0LL };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set_pi32(int __i1, int __i0)
+{
+    return (__m64)(__v2si){ __i0, __i1 };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
+{
+    return (__m64)(__v4hi){ __s0, __s1, __s2, __s3 };    
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
+            char __b1, char __b0)
+{
+    return (__m64)(__v8qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7 };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set1_pi32(int __i)
+{
+    return (__m64)(__v2si){ __i, __i };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set1_pi16(short __s)
+{
+    return (__m64)(__v4hi){ __s, __s, __s, __s };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_set1_pi8(char __b)
+{
+    return (__m64)(__v8qi){ __b, __b, __b, __b, __b, __b, __b, __b };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_setr_pi32(int __i1, int __i0)
+{
+    return (__m64)(__v2si){ __i1, __i0 };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_setr_pi16(short __s3, short __s2, short __s1, short __s0)
+{
+    return (__m64)(__v4hi){ __s3, __s2, __s1, __s0 };
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
+             char __b1, char __b0)
+{
+    return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 };
+}
+
+#endif /* __MMX__ */
+
+#endif /* __MMINTRIN_H */
+
diff --git a/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
new file mode 100644
index 0000000..cc213ce
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
@@ -0,0 +1,35 @@
+/*===---- nmmintrin.h - SSE intrinsics -------------------------------------===
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+* THE SOFTWARE.
+*
+*===-----------------------------------------------------------------------===
+*/
+
+#ifndef _NMMINTRIN_H
+#define _NMMINTRIN_H
+
+#ifndef __SSE4_2__
+#error "SSE4.2 instruction set not enabled"
+#else
+
+/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
+   just include it now then.  */
+#include <smmintrin.h>
+#endif /* __SSE4_2__ */
+#endif /* _NMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
new file mode 100644
index 0000000..7ca386c
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
@@ -0,0 +1,121 @@
+/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+ 
+#ifndef __PMMINTRIN_H
+#define __PMMINTRIN_H
+
+#ifndef __SSE3__
+#error "SSE3 instruction set not enabled"
+#else
+
+#include <emmintrin.h>
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_lddqu_si128(__m128i const *p)
+{
+  return (__m128i)__builtin_ia32_lddqu((char const *)p);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_addsub_ps(__m128 a, __m128 b)
+{
+  return __builtin_ia32_addsubps(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_ps(__m128 a, __m128 b)
+{
+  return __builtin_ia32_haddps(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_ps(__m128 a, __m128 b)
+{
+  return __builtin_ia32_hsubps(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_movehdup_ps(__m128 a)
+{
+  return __builtin_shufflevector(a, a, 1, 1, 3, 3);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_moveldup_ps(__m128 a)
+{
+  return __builtin_shufflevector(a, a, 0, 0, 2, 2);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_addsub_pd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_addsubpd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_pd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_haddpd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_pd(__m128d a, __m128d b)
+{
+  return __builtin_ia32_hsubpd(a, b);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_loaddup_pd(double const *dp)
+{
+  return (__m128d){ *dp, *dp };
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_movedup_pd(__m128d a)
+{
+  return __builtin_shufflevector(a, a, 0, 0);
+}
+
+#define _MM_DENORMALS_ZERO_ON   (0x0040)
+#define _MM_DENORMALS_ZERO_OFF  (0x0000)
+
+#define _MM_DENORMALS_ZERO_MASK (0x0040)
+
+#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_monitor(void const *p, unsigned extensions, unsigned hints)
+{
+  __builtin_ia32_monitor((void *)p, extensions, hints);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_mwait(unsigned extensions, unsigned hints)
+{
+  __builtin_ia32_mwait(extensions, hints);
+}
+
+#endif /* __SSE3__ */
+
+#endif /* __PMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
new file mode 100644
index 0000000..e271f99
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
@@ -0,0 +1,454 @@
+/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _SMMINTRIN_H
+#define _SMMINTRIN_H
+
+#ifndef __SSE4_1__
+#error "SSE4.1 instruction set not enabled"
+#else
+
+#include <tmmintrin.h>
+
+/* Type defines.  */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+/* SSE4 Rounding macros. */
+#define _MM_FROUND_TO_NEAREST_INT    0x00
+#define _MM_FROUND_TO_NEG_INF        0x01
+#define _MM_FROUND_TO_POS_INF        0x02
+#define _MM_FROUND_TO_ZERO           0x03
+#define _MM_FROUND_CUR_DIRECTION     0x04
+
+#define _MM_FROUND_RAISE_EXC         0x00
+#define _MM_FROUND_NO_EXC            0x08
+
+#define _MM_FROUND_NINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)
+#define _MM_FROUND_FLOOR     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)
+#define _MM_FROUND_CEIL      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)
+#define _MM_FROUND_TRUNC     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)
+#define _MM_FROUND_RINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
+#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_ceil_ps(X)       _mm_round_ps((X), _MM_FROUND_CEIL)
+#define _mm_ceil_pd(X)       _mm_round_pd((X), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(X, Y)    _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(X, Y)    _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(X)      _mm_round_ps((X), _MM_FROUND_FLOOR)
+#define _mm_floor_pd(X)      _mm_round_pd((X), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(X, Y)   _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(X, Y)   _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
+
+#define _mm_round_ps(X, Y)      __builtin_ia32_roundps((X), (Y))
+#define _mm_round_ss(X, Y, M)   __builtin_ia32_roundss((X), (Y), (M))
+#define _mm_round_pd(X, M)      __builtin_ia32_roundpd((X), (M))
+#define _mm_round_sd(X, Y, M)   __builtin_ia32_roundsd((X), (Y), (M))
+
+/* SSE4 Packed Blending Intrinsics.  */
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_blend_pd (__m128d __V1, __m128d __V2, const int __M)
+{
+  return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_blend_ps (__m128 __V1, __m128 __V2, const int __M)
+{
+  return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
+{
+  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
+                                            (__v2df)__M);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
+{
+  return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
+                                           (__v4sf)__M);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
+{
+  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
+                                               (__v16qi)__M);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M)
+{
+  return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M);
+}
+
+/* SSE4 Dword Multiply Instructions.  */
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mullo_epi32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mul_epi32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
+}
+
+/* SSE4 Floating Point Dot Product Instructions.  */
+#define _mm_dp_ps(X, Y, M) __builtin_ia32_dpps ((X), (Y), (M))
+#define _mm_dp_pd(X, Y, M) __builtin_ia32_dppd ((X), (Y), (M))
+
+/* SSE4 Streaming Load Hint Instruction.  */
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_stream_load_si128 (__m128i *__V)
+{
+  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
+}
+
+/* SSE4 Packed Integer Min/Max Instructions.  */
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epi8 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epi8 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epu16 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epu16 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epi32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epi32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_min_epu32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
+}
+
+static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_max_epu32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
+}
+
+/* SSE4 Insertion and Extraction from XMM Register Instructions.  */
+#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
+#define _mm_extract_ps(X, N) (__extension__                      \
+                              ({ union { int i; float f; } __t;  \
+                                 __v4sf __a = (__v4sf)X;         \
+                                 __t.f = __a[N];                 \
+                                 __t.i;}))
+
+/* Miscellaneous insert and extract macros.  */
+/* Extract a single-precision float from X at index N into D.  */
+#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)X; \
+                                                    (D) = __a[N]; }))
+                                                    
+/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
+   an index suitable for _mm_insert_ps.  */
+#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
+                                           
+/* Extract a float from X at index N into the first index of the return.  */
+#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X),   \
+                                             _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
+                                             
+/* Insert int into packed integer array at index.  */
+#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \
+                                                   __a[N] = I;               \
+                                                   __a;}))
+#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)X; \
+                                                    __a[N] = I;             \
+                                                    __a;}))
+#ifdef __x86_64__
+#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)X; \
+                                                    __a[N] = I;             \
+                                                    __a;}))
+#endif /* __x86_64__ */
+
+/* Extract int from packed integer array at index.  */
+#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \
+                                                 __a[N];}))
+#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)X; \
+                                                  __a[N];}))
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)X; \
+                                                  __a[N];}))
+#endif /* __x86_64 */
+
+/* SSE4 128-bit Packed Integer Comparisons.  */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_testz_si128(__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_testc_si128(__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_testnzc_si128(__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
+}
+
+#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V))
+
+/* SSE4 64-bit Packed Integer Comparisons.  */
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);
+}
+
+/* SSE4 Packed Integer Sign-Extension.  */
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi16(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi32(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi16_epi32(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); 
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi16_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi32_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
+}
+
+/* SSE4 Packed Integer Zero-Extension.  */
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi16(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi32(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu16_epi32(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu16_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu32_epi64(__m128i __V)
+{
+  return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
+}
+
+/* SSE4 Pack with Unsigned Saturation.  */
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_packus_epi32(__m128i __V1, __m128i __V2)
+{
+  return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
+}
+
+/* SSE4 Multiple Packed Sums of Absolute Difference.  */
+#define _mm_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw128((X), (Y), (M))
+
+/* These definitions are normally in nmmintrin.h, but gcc puts them in here
+   so we'll do the same.  */
+#ifdef __SSE4_2__
+
+/* These specify the type of data that we're comparing.  */
+#define _SIDD_UBYTE_OPS                 0x00
+#define _SIDD_UWORD_OPS                 0x01
+#define _SIDD_SBYTE_OPS                 0x02
+#define _SIDD_SWORD_OPS                 0x03
+
+/* These specify the type of comparison operation.  */
+#define _SIDD_CMP_EQUAL_ANY             0x00
+#define _SIDD_CMP_RANGES                0x04
+#define _SIDD_CMP_EQUAL_EACH            0x08
+#define _SIDD_CMP_EQUAL_ORDERED         0x0c
+
+/* These macros specify the polarity of the operation.  */
+#define _SIDD_POSITIVE_POLARITY         0x00
+#define _SIDD_NEGATIVE_POLARITY         0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY  0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
+
+/* These macros are used in _mm_cmpXstri() to specify the return.  */
+#define _SIDD_LEAST_SIGNIFICANT         0x00
+#define _SIDD_MOST_SIGNIFICANT          0x40
+
+/* These macros are used in _mm_cmpXstri() to specify the return.  */
+#define _SIDD_BIT_MASK                  0x00
+#define _SIDD_UNIT_MASK                 0x40
+
+/* SSE4.2 Packed Comparison Intrinsics.  */
+#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))
+#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))
+
+#define _mm_cmpestrm(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))
+#define _mm_cmpestri(X, LX, Y, LY, M) \
+     __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))
+     
+/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading.  */
+#define _mm_cmpistra(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M))
+#define _mm_cmpistrc(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M))
+#define _mm_cmpistro(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M))
+#define _mm_cmpistrs(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M))
+#define _mm_cmpistrz(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M))
+
+#define _mm_cmpestra(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))
+#define _mm_cmpestrc(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))
+#define _mm_cmpestro(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))
+#define _mm_cmpestrs(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))
+#define _mm_cmpestrz(A, LA, B, LB, M) \
+     __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
+
+/* SSE4.2 Compare Packed Data -- Greater Than.  */
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
+{
+  return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2);
+}
+
+/* SSE4.2 Accumulate CRC32.  */
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_mm_crc32_u8(unsigned int __C, unsigned char __D)
+{
+  return __builtin_ia32_crc32qi(__C, __D);
+}
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_mm_crc32_u16(unsigned int __C, unsigned short __D)
+{
+  return __builtin_ia32_crc32hi(__C, __D);
+}
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_mm_crc32_u32(unsigned int __C, unsigned int __D)
+{
+  return __builtin_ia32_crc32si(__C, __D);
+}
+
+#ifdef __x86_64__
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
+{
+  return __builtin_ia32_crc32di(__C, __D);
+}
+#endif /* __x86_64__ */
+
+/* SSE4.2 Population Count.  */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_popcnt_u32(unsigned int __A)
+{
+  return __builtin_popcount(__A);
+}
+
+#ifdef __x86_64__
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_popcnt_u64(unsigned long long __A)
+{
+  return __builtin_popcountll(__A);
+}
+#endif /* __x86_64__ */
+
+#endif /* __SSE4_2__ */
+#endif /* __SSE4_1__ */
+
+#endif /* _SMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/stdarg.h b/contrib/llvm/tools/clang/lib/Headers/stdarg.h
new file mode 100644
index 0000000..c36ab12
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/stdarg.h
@@ -0,0 +1,50 @@
+/*===---- stdarg.h - Variable argument handling ----------------------------===
+ *
+ * Copyright (c) 2008 Eli Friedman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDARG_H
+#define __STDARG_H
+
+#ifndef _VA_LIST
+typedef __builtin_va_list va_list;
+#define _VA_LIST
+#endif
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap)          __builtin_va_end(ap)
+#define va_arg(ap, type)    __builtin_va_arg(ap, type)
+
+/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode
+ * or -ansi is not specified, since it was not part of C90.
+ */
+#define __va_copy(d,s) __builtin_va_copy(d,s)
+
+#if __STDC_VERSION__ >= 199900L || !defined(__STRICT_ANSI__)
+#define va_copy(dest, src)  __builtin_va_copy(dest, src)
+#endif
+
+/* Hack required to make standard headers work, at least on Ubuntu */
+#define __GNUC_VA_LIST 1
+typedef __builtin_va_list __gnuc_va_list;
+
+#endif /* __STDARG_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/stdbool.h b/contrib/llvm/tools/clang/lib/Headers/stdbool.h
new file mode 100644
index 0000000..e44a1f9
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/stdbool.h
@@ -0,0 +1,38 @@
+/*===---- stdbool.h - Standard header for booleans -------------------------===
+ *
+ * Copyright (c) 2008 Eli Friedman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDBOOL_H
+#define __STDBOOL_H
+
+/* Don't define bool, true, and false in C++ */
+#ifndef __cplusplus
+#define bool _Bool
+#define true 1
+#define false 0
+#endif
+
+#define __bool_true_false_are_defined 1
+
+#endif /* __STDBOOL_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/stddef.h b/contrib/llvm/tools/clang/lib/Headers/stddef.h
new file mode 100644
index 0000000..6868ad3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/stddef.h
@@ -0,0 +1,50 @@
+/*===---- stddef.h - Basic type definitions --------------------------------===
+ *
+ * Copyright (c) 2008 Eli Friedman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDDEF_H
+#define __STDDEF_H
+
+typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
+#ifndef _SIZE_T
+#define _SIZE_T
+typedef __typeof__(sizeof(int)) size_t;
+#endif
+#ifndef __cplusplus
+#ifndef _WCHAR_T
+#define _WCHAR_T
+typedef __typeof__(*L"") wchar_t;
+#endif
+#endif
+
+#undef NULL
+#ifdef __cplusplus
+#define NULL __null
+#else
+#define NULL ((void*)0)
+#endif
+
+#define offsetof(t, d) __builtin_offsetof(t, d)
+
+#endif /* __STDDEF_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/stdint.h b/contrib/llvm/tools/clang/lib/Headers/stdint.h
new file mode 100644
index 0000000..1785f31
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/stdint.h
@@ -0,0 +1,654 @@
+/*===---- stdint.h - Standard header for sized integer types --------------===*\
+ *
+ * Copyright (c) 2009 Chris Lattner
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#ifndef __CLANG_STDINT_H
+#define __CLANG_STDINT_H
+
+/* If we're hosted, fall back to the system's stdint.h, which might have
+ * additional definitions.
+ */
+#if __STDC_HOSTED__ && \
+    defined(__has_include_next) && __has_include_next(<stdint.h>)
+# include_next <stdint.h>
+#else
+
+/* C99 7.18.1.1 Exact-width integer types.
+ * C99 7.18.1.2 Minimum-width integer types.
+ * C99 7.18.1.3 Fastest minimum-width integer types.
+ *
+ * The standard requires that exact-width type be defined for 8-, 16-, 32-, and 
+ * 64-bit types if they are implemented. Other exact width types are optional.
+ * This implementation defines an exact-width types for every integer width
+ * that is represented in the standard integer types.
+ *
+ * The standard also requires minimum-width types be defined for 8-, 16-, 32-,
+ * and 64-bit widths regardless of whether there are corresponding exact-width
+ * types. 
+ *
+ * To accomodate targets that are missing types that are exactly 8, 16, 32, or
+ * 64 bits wide, this implementation takes an approach of cascading
+ * redefintions, redefining __int_leastN_t to successively smaller exact-width
+ * types. It is therefore important that the types are defined in order of
+ * descending widths.
+ *
+ * We currently assume that the minimum-width types and the fastest
+ * minimum-width types are the same. This is allowed by the standard, but is
+ * suboptimal.
+ *
+ * In violation of the standard, some targets do not implement a type that is
+ * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).  
+ * To accomodate these targets, a required minimum-width type is only
+ * defined if there exists an exact-width type of equal or greater width.
+ */
+
+#ifdef __INT64_TYPE__
+# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
+typedef signed __INT64_TYPE__ int64_t;
+# endif /* __int8_t_defined */
+typedef unsigned __INT64_TYPE__ uint64_t;
+# define __int_least64_t int64_t
+# define __uint_least64_t uint64_t
+# define __int_least32_t int64_t
+# define __uint_least32_t uint64_t
+# define __int_least16_t int64_t
+# define __uint_least16_t uint64_t
+# define __int_least8_t int64_t
+# define __uint_least8_t uint64_t
+#endif /* __INT64_TYPE__ */
+
+#ifdef __int_least64_t
+typedef __int_least64_t int_least64_t;
+typedef __uint_least64_t uint_least64_t;
+typedef __int_least64_t int_fast64_t;
+typedef __uint_least64_t uint_fast64_t;
+#endif /* __int_least64_t */
+
+#ifdef __INT56_TYPE__
+typedef signed __INT56_TYPE__ int56_t;
+typedef unsigned __INT56_TYPE__ uint56_t;
+typedef int56_t int_least56_t;
+typedef uint56_t uint_least56_t;
+typedef int56_t int_fast56_t;
+typedef uint56_t uint_fast56_t;
+# define __int_least32_t int56_t
+# define __uint_least32_t uint56_t
+# define __int_least16_t int56_t
+# define __uint_least16_t uint56_t
+# define __int_least8_t int56_t
+# define __uint_least8_t uint56_t
+#endif /* __INT56_TYPE__ */
+
+
+#ifdef __INT48_TYPE__
+typedef signed __INT48_TYPE__ int48_t;
+typedef unsigned __INT48_TYPE__ uint48_t;
+typedef int48_t int_least48_t;
+typedef uint48_t uint_least48_t;
+typedef int48_t int_fast48_t;
+typedef uint48_t uint_fast48_t;
+# define __int_least32_t int48_t
+# define __uint_least32_t uint48_t
+# define __int_least16_t int48_t
+# define __uint_least16_t uint48_t
+# define __int_least8_t int48_t
+# define __uint_least8_t uint48_t
+#endif /* __INT48_TYPE__ */
+
+
+#ifdef __INT40_TYPE__
+typedef signed __INT40_TYPE__ int40_t;
+typedef unsigned __INT40_TYPE__ uint40_t;
+typedef int40_t int_least40_t;
+typedef uint40_t uint_least40_t;
+typedef int40_t int_fast40_t;
+typedef uint40_t uint_fast40_t;
+# define __int_least32_t int40_t
+# define __uint_least32_t uint40_t
+# define __int_least16_t int40_t
+# define __uint_least16_t uint40_t
+# define __int_least8_t int40_t
+# define __uint_least8_t uint40_t
+#endif /* __INT40_TYPE__ */
+
+
+#ifdef __INT32_TYPE__
+
+# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
+typedef signed __INT32_TYPE__ int32_t;
+# endif /* __int8_t_defined */
+
+# ifndef __uint32_t_defined  /* more glibc compatibility */
+# define __uint32_t_defined
+typedef unsigned __INT32_TYPE__ uint32_t;
+# endif /* __uint32_t_defined */
+
+# define __int_least32_t int32_t
+# define __uint_least32_t uint32_t
+# define __int_least16_t int32_t
+# define __uint_least16_t uint32_t
+# define __int_least8_t int32_t
+# define __uint_least8_t uint32_t
+#endif /* __INT32_TYPE__ */
+
+#ifdef __int_least32_t
+typedef __int_least32_t int_least32_t;
+typedef __uint_least32_t uint_least32_t;
+typedef __int_least32_t int_fast32_t;
+typedef __uint_least32_t uint_fast32_t;
+#endif /* __int_least32_t */
+
+#ifdef __INT24_TYPE__
+typedef signed __INT24_TYPE__ int24_t;
+typedef unsigned __INT24_TYPE__ uint24_t;
+typedef int24_t int_least24_t;
+typedef uint24_t uint_least24_t;
+typedef int24_t int_fast24_t;
+typedef uint24_t uint_fast24_t;
+# define __int_least16_t int24_t
+# define __uint_least16_t uint24_t
+# define __int_least8_t int24_t
+# define __uint_least8_t uint24_t
+#endif /* __INT24_TYPE__ */
+
+#ifdef __INT16_TYPE__
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/
+typedef signed __INT16_TYPE__ int16_t;
+#endif /* __int8_t_defined */
+typedef unsigned __INT16_TYPE__ uint16_t;
+# define __int_least16_t int16_t
+# define __uint_least16_t uint16_t
+# define __int_least8_t int16_t
+# define __uint_least8_t uint16_t
+#endif /* __INT16_TYPE__ */
+
+#ifdef __int_least16_t
+typedef __int_least16_t int_least16_t;
+typedef __uint_least16_t uint_least16_t;
+typedef __int_least16_t int_fast16_t;
+typedef __uint_least16_t uint_fast16_t;
+#endif /* __int_least16_t */
+
+
+#ifdef __INT8_TYPE__
+#ifndef __int8_t_defined  /* glibc sys/types.h also defines int8_t*/
+typedef signed __INT8_TYPE__ int8_t;
+#endif /* __int8_t_defined */
+typedef unsigned __INT8_TYPE__ uint8_t;
+# define __int_least8_t int8_t
+# define __uint_least8_t uint8_t
+#endif /* __INT8_TYPE__ */
+
+#ifdef __int_least8_t
+typedef __int_least8_t int_least8_t;
+typedef __uint_least8_t uint_least8_t;
+typedef __int_least8_t int_fast8_t;
+typedef __uint_least8_t uint_fast8_t;
+#endif /* __int_least8_t */
+
+/* prevent glibc sys/types.h from defining conflicting types */
+#ifndef __int8_t_defined  
+# define __int8_t_defined
+#endif /* __int8_t_defined */
+
+/* C99 7.18.1.4 Integer types capable of holding object pointers.
+ */
+#define __stdint_join3(a,b,c) a ## b ## c
+
+#define  __intn_t(n) __stdint_join3( int, n, _t)
+#define __uintn_t(n) __stdint_join3(uint, n, _t)
+
+#ifndef _INTPTR_T
+#ifndef __intptr_t_defined
+typedef  __intn_t(__INTPTR_WIDTH__)  intptr_t;
+#define __intptr_t_defined
+#define _INTPTR_T
+#endif
+#endif
+
+#ifndef _UINTPTR_T
+typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t;
+#define _UINTPTR_T
+#endif
+
+/* C99 7.18.1.5 Greatest-width integer types.
+ */
+typedef  __intn_t(__INTMAX_WIDTH__)  intmax_t;
+typedef __uintn_t(__INTMAX_WIDTH__) uintmax_t;
+
+/* C99 7.18.4 Macros for minimum-width integer constants.
+ *
+ * The standard requires that integer constant macros be defined for all the
+ * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width
+ * types are required, the corresponding integer constant macros are defined 
+ * here. This implementation also defines minimum-width types for every other
+ * integer width that the target implements, so corresponding macros are 
+ * defined below, too.
+ *
+ * These macros are defined using the same successive-shrinking approach as
+ * the type definitions above. It is likewise important that macros are defined
+ * in order of decending width.
+ *
+ * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the
+ * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
+ */
+
+#define __int_c_join(a, b) a ## b
+#define __int_c(v, suffix) __int_c_join(v, suffix)
+#define __uint_c(v, suffix) __int_c_join(v##U, suffix)
+
+
+#ifdef __INT64_TYPE__
+# ifdef __INT64_C_SUFFIX__
+#  define __int64_c_suffix __INT64_C_SUFFIX__
+#  define __int32_c_suffix __INT64_C_SUFFIX__
+#  define __int16_c_suffix __INT64_C_SUFFIX__
+#  define  __int8_c_suffix __INT64_C_SUFFIX__
+# else
+#  undef __int64_c_suffix
+#  undef __int32_c_suffix
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT64_C_SUFFIX__ */
+#endif /* __INT64_TYPE__ */
+
+#ifdef __int_least64_t
+# ifdef __int64_c_suffix
+#  define INT64_C(v) __int_c(v, __int64_c_suffix)
+#  define UINT64_C(v) __uint_c(v, __int64_c_suffix)
+# else
+#  define INT64_C(v) v
+#  define UINT64_C(v) v ## U
+# endif /* __int64_c_suffix */
+#endif /* __int_least64_t */
+
+
+#ifdef __INT56_TYPE__
+# ifdef __INT56_C_SUFFIX__
+#  define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)
+#  define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)
+#  define __int32_c_suffix __INT56_C_SUFFIX__
+#  define __int16_c_suffix __INT56_C_SUFFIX__
+#  define __int8_c_suffix  __INT56_C_SUFFIX__
+# else
+#  define INT56_C(v) v
+#  define UINT56_C(v) v ## U
+#  undef __int32_c_suffix
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT56_C_SUFFIX__ */
+#endif /* __INT56_TYPE__ */
+
+
+#ifdef __INT48_TYPE__
+# ifdef __INT48_C_SUFFIX__
+#  define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)
+#  define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)
+#  define __int32_c_suffix __INT48_C_SUFFIX__
+#  define __int16_c_suffix __INT48_C_SUFFIX__
+#  define __int8_c_suffix  __INT48_C_SUFFIX__
+# else
+#  define INT48_C(v) v
+#  define UINT48_C(v) v ## U
+#  undef __int32_c_suffix
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT48_C_SUFFIX__ */
+#endif /* __INT48_TYPE__ */
+
+
+#ifdef __INT40_TYPE__
+# ifdef __INT40_C_SUFFIX__
+#  define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)
+#  define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)
+#  define __int32_c_suffix __INT40_C_SUFFIX__
+#  define __int16_c_suffix __INT40_C_SUFFIX__
+#  define __int8_c_suffix  __INT40_C_SUFFIX__
+# else
+#  define INT40_C(v) v
+#  define UINT40_C(v) v ## U
+#  undef __int32_c_suffix
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT40_C_SUFFIX__ */
+#endif /* __INT40_TYPE__ */
+
+
+#ifdef __INT32_TYPE__
+# ifdef __INT32_C_SUFFIX__
+#  define __int32_c_suffix __INT32_C_SUFFIX__
+#  define __int16_c_suffix __INT32_C_SUFFIX__
+#  define __int8_c_suffix  __INT32_C_SUFFIX__
+#else
+#  undef __int32_c_suffix
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT32_C_SUFFIX__ */
+#endif /* __INT32_TYPE__ */
+
+#ifdef __int_least32_t
+# ifdef __int32_c_suffix
+#  define INT32_C(v) __int_c(v, __int32_c_suffix)
+#  define UINT32_C(v) __uint_c(v, __int32_c_suffix)
+# else
+#  define INT32_C(v) v
+#  define UINT32_C(v) v ## U
+# endif /* __int32_c_suffix */
+#endif /* __int_least32_t */
+
+
+#ifdef __INT24_TYPE__
+# ifdef __INT24_C_SUFFIX__
+#  define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)
+#  define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)
+#  define __int16_c_suffix __INT24_C_SUFFIX__
+#  define __int8_c_suffix  __INT24_C_SUFFIX__
+# else
+#  define INT24_C(v) v
+#  define UINT24_C(v) v ## U
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT24_C_SUFFIX__ */
+#endif /* __INT24_TYPE__ */
+
+
+#ifdef __INT16_TYPE__
+# ifdef __INT16_C_SUFFIX__
+#  define __int16_c_suffix __INT16_C_SUFFIX__
+#  define __int8_c_suffix  __INT16_C_SUFFIX__
+#else
+#  undef __int16_c_suffix
+#  undef  __int8_c_suffix
+# endif /* __INT16_C_SUFFIX__ */
+#endif /* __INT16_TYPE__ */
+
+#ifdef __int_least16_t
+# ifdef __int16_c_suffix
+#  define INT16_C(v) __int_c(v, __int16_c_suffix)
+#  define UINT16_C(v) __uint_c(v, __int16_c_suffix)
+# else
+#  define INT16_C(v) v
+#  define UINT16_C(v) v ## U
+# endif /* __int16_c_suffix */
+#endif /* __int_least16_t */
+
+
+#ifdef __INT8_TYPE__
+# ifdef __INT8_C_SUFFIX__
+#  define __int8_c_suffix __INT8_C_SUFFIX__
+#else
+#  undef  __int8_c_suffix
+# endif /* __INT8_C_SUFFIX__ */
+#endif /* __INT8_TYPE__ */
+
+#ifdef __int_least8_t
+# ifdef __int8_c_suffix
+#  define INT8_C(v) __int_c(v, __int8_c_suffix)
+#  define UINT8_C(v) __uint_c(v, __int8_c_suffix)
+# else
+#  define INT8_C(v) v
+#  define UINT8_C(v) v ## U
+# endif /* __int8_c_suffix */
+#endif /* __int_least8_t */
+
+
+/* C99 7.18.2.1 Limits of exact-width integer types. 
+ * C99 7.18.2.2 Limits of minimum-width integer types.
+ * C99 7.18.2.3 Limits of fastest minimum-width integer types.
+ *
+ * The presence of limit macros are completely optional in C99.  This
+ * implementation defines limits for all of the types (exact- and
+ * minimum-width) that it defines above, using the limits of the minimum-width
+ * type for any types that do not have exact-width representations.
+ *
+ * As in the type definitions, this section takes an approach of
+ * successive-shrinking to determine which limits to use for the standard (8,
+ * 16, 32, 64) bit widths when they don't have exact representations. It is
+ * therefore important that the defintions be kept in order of decending
+ * widths.
+ *
+ * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the
+ * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
+ */
+
+#ifdef __INT64_TYPE__
+# define INT64_MAX           INT64_C( 9223372036854775807)
+# define INT64_MIN         (-INT64_C( 9223372036854775807)-1)
+# define UINT64_MAX         UINT64_C(18446744073709551615)
+# define __INT_LEAST64_MIN   INT64_MIN
+# define __INT_LEAST64_MAX   INT64_MAX
+# define __UINT_LEAST64_MAX UINT64_MAX
+# define __INT_LEAST32_MIN   INT64_MIN
+# define __INT_LEAST32_MAX   INT64_MAX
+# define __UINT_LEAST32_MAX UINT64_MAX
+# define __INT_LEAST16_MIN   INT64_MIN
+# define __INT_LEAST16_MAX   INT64_MAX
+# define __UINT_LEAST16_MAX UINT64_MAX
+# define __INT_LEAST8_MIN    INT64_MIN
+# define __INT_LEAST8_MAX    INT64_MAX
+# define __UINT_LEAST8_MAX  UINT64_MAX
+#endif /* __INT64_TYPE__ */
+
+#ifdef __INT_LEAST64_MIN
+# define INT_LEAST64_MIN   __INT_LEAST64_MIN
+# define INT_LEAST64_MAX   __INT_LEAST64_MAX
+# define UINT_LEAST64_MAX __UINT_LEAST64_MAX
+# define INT_FAST64_MIN    __INT_LEAST64_MIN
+# define INT_FAST64_MAX    __INT_LEAST64_MAX
+# define UINT_FAST64_MAX  __UINT_LEAST64_MAX
+#endif /* __INT_LEAST64_MIN */
+
+
+#ifdef __INT56_TYPE__
+# define INT56_MAX           INT56_C(36028797018963967)
+# define INT56_MIN         (-INT56_C(36028797018963967)-1)
+# define UINT56_MAX         UINT56_C(72057594037927935)
+# define INT_LEAST56_MIN     INT56_MIN
+# define INT_LEAST56_MAX     INT56_MAX
+# define UINT_LEAST56_MAX   UINT56_MAX
+# define INT_FAST56_MIN      INT56_MIN
+# define INT_FAST56_MAX      INT56_MAX
+# define UINT_FAST56_MAX    UINT56_MAX
+# define __INT_LEAST32_MIN   INT56_MIN
+# define __INT_LEAST32_MAX   INT56_MAX
+# define __UINT_LEAST32_MAX UINT56_MAX
+# define __INT_LEAST16_MIN   INT56_MIN
+# define __INT_LEAST16_MAX   INT56_MAX
+# define __UINT_LEAST16_MAX UINT56_MAX
+# define __INT_LEAST8_MIN    INT56_MIN
+# define __INT_LEAST8_MAX    INT56_MAX
+# define __UINT_LEAST8_MAX  UINT56_MAX
+#endif /* __INT56_TYPE__ */
+
+
+#ifdef __INT48_TYPE__
+# define INT48_MAX           INT48_C(140737488355327)
+# define INT48_MIN         (-INT48_C(140737488355327)-1)
+# define UINT48_MAX         UINT48_C(281474976710655)
+# define INT_LEAST48_MIN     INT48_MIN
+# define INT_LEAST48_MAX     INT48_MAX
+# define UINT_LEAST48_MAX   UINT48_MAX
+# define INT_FAST48_MIN      INT48_MIN
+# define INT_FAST48_MAX      INT48_MAX
+# define UINT_FAST48_MAX    UINT48_MAX
+# define __INT_LEAST32_MIN   INT48_MIN
+# define __INT_LEAST32_MAX   INT48_MAX
+# define __UINT_LEAST32_MAX UINT48_MAX
+# define __INT_LEAST16_MIN   INT48_MIN
+# define __INT_LEAST16_MAX   INT48_MAX
+# define __UINT_LEAST16_MAX UINT48_MAX
+# define __INT_LEAST8_MIN    INT48_MIN
+# define __INT_LEAST8_MAX    INT48_MAX
+# define __UINT_LEAST8_MAX  UINT48_MAX
+#endif /* __INT48_TYPE__ */
+
+
+#ifdef __INT40_TYPE__
+# define INT40_MAX           INT40_C(549755813887)
+# define INT40_MIN         (-INT40_C(549755813887)-1)
+# define UINT40_MAX         UINT40_C(1099511627775)
+# define INT_LEAST40_MIN     INT40_MIN
+# define INT_LEAST40_MAX     INT40_MAX
+# define UINT_LEAST40_MAX   UINT40_MAX
+# define INT_FAST40_MIN      INT40_MIN
+# define INT_FAST40_MAX      INT40_MAX
+# define UINT_FAST40_MAX    UINT40_MAX
+# define __INT_LEAST32_MIN   INT40_MIN
+# define __INT_LEAST32_MAX   INT40_MAX
+# define __UINT_LEAST32_MAX UINT40_MAX
+# define __INT_LEAST16_MIN   INT40_MIN
+# define __INT_LEAST16_MAX   INT40_MAX
+# define __UINT_LEAST16_MAX UINT40_MAX
+# define __INT_LEAST8_MIN    INT40_MIN
+# define __INT_LEAST8_MAX    INT40_MAX
+# define __UINT_LEAST8_MAX  UINT40_MAX
+#endif /* __INT40_TYPE__ */
+
+
+#ifdef __INT32_TYPE__
+# define INT32_MAX           INT32_C(2147483647)
+# define INT32_MIN         (-INT32_C(2147483647)-1)
+# define UINT32_MAX         UINT32_C(4294967295)
+# define __INT_LEAST32_MIN   INT32_MIN
+# define __INT_LEAST32_MAX   INT32_MAX
+# define __UINT_LEAST32_MAX UINT32_MAX
+# define __INT_LEAST16_MIN   INT32_MIN
+# define __INT_LEAST16_MAX   INT32_MAX
+# define __UINT_LEAST16_MAX UINT32_MAX
+# define __INT_LEAST8_MIN    INT32_MIN
+# define __INT_LEAST8_MAX    INT32_MAX
+# define __UINT_LEAST8_MAX  UINT32_MAX
+#endif /* __INT32_TYPE__ */
+
+#ifdef __INT_LEAST32_MIN
+# define INT_LEAST32_MIN   __INT_LEAST32_MIN
+# define INT_LEAST32_MAX   __INT_LEAST32_MAX
+# define UINT_LEAST32_MAX __UINT_LEAST32_MAX
+# define INT_FAST32_MIN    __INT_LEAST32_MIN
+# define INT_FAST32_MAX    __INT_LEAST32_MAX
+# define UINT_FAST32_MAX  __UINT_LEAST32_MAX
+#endif /* __INT_LEAST32_MIN */
+
+
+#ifdef __INT24_TYPE__
+# define INT24_MAX           INT24_C(8388607)
+# define INT24_MIN         (-INT24_C(8388607)-1)
+# define UINT24_MAX         UINT24_C(16777215)
+# define INT_LEAST24_MIN     INT24_MIN
+# define INT_LEAST24_MAX     INT24_MAX
+# define UINT_LEAST24_MAX   UINT24_MAX
+# define INT_FAST24_MIN      INT24_MIN
+# define INT_FAST24_MAX      INT24_MAX
+# define UINT_FAST24_MAX    UINT24_MAX
+# define __INT_LEAST16_MIN   INT24_MIN
+# define __INT_LEAST16_MAX   INT24_MAX
+# define __UINT_LEAST16_MAX UINT24_MAX
+# define __INT_LEAST8_MIN    INT24_MIN
+# define __INT_LEAST8_MAX    INT24_MAX
+# define __UINT_LEAST8_MAX  UINT24_MAX
+#endif /* __INT24_TYPE__ */
+
+
+#ifdef __INT16_TYPE__
+#define INT16_MAX            INT16_C(32767)
+#define INT16_MIN          (-INT16_C(32767)-1)
+#define UINT16_MAX          UINT16_C(65535)
+# define __INT_LEAST16_MIN   INT16_MIN
+# define __INT_LEAST16_MAX   INT16_MAX
+# define __UINT_LEAST16_MAX UINT16_MAX
+# define __INT_LEAST8_MIN    INT16_MIN
+# define __INT_LEAST8_MAX    INT16_MAX
+# define __UINT_LEAST8_MAX  UINT16_MAX
+#endif /* __INT16_TYPE__ */
+
+#ifdef __INT_LEAST16_MIN
+# define INT_LEAST16_MIN   __INT_LEAST16_MIN
+# define INT_LEAST16_MAX   __INT_LEAST16_MAX
+# define UINT_LEAST16_MAX __UINT_LEAST16_MAX
+# define INT_FAST16_MIN    __INT_LEAST16_MIN
+# define INT_FAST16_MAX    __INT_LEAST16_MAX
+# define UINT_FAST16_MAX  __UINT_LEAST16_MAX
+#endif /* __INT_LEAST16_MIN */
+
+
+#ifdef __INT8_TYPE__
+# define INT8_MAX            INT8_C(127)
+# define INT8_MIN          (-INT8_C(127)-1)
+# define UINT8_MAX          UINT8_C(255)
+# define __INT_LEAST8_MIN    INT8_MIN
+# define __INT_LEAST8_MAX    INT8_MAX
+# define __UINT_LEAST8_MAX  UINT8_MAX
+#endif /* __INT8_TYPE__ */
+
+#ifdef __INT_LEAST8_MIN
+# define INT_LEAST8_MIN   __INT_LEAST8_MIN
+# define INT_LEAST8_MAX   __INT_LEAST8_MAX
+# define UINT_LEAST8_MAX __UINT_LEAST8_MAX
+# define INT_FAST8_MIN    __INT_LEAST8_MIN
+# define INT_FAST8_MAX    __INT_LEAST8_MAX
+# define UINT_FAST8_MAX  __UINT_LEAST8_MAX
+#endif /* __INT_LEAST8_MIN */
+
+/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
+/* C99 7.18.3 Limits of other integer types. */
+#define  __INTN_MIN(n) __stdint_join3( INT, n, _MIN)
+#define  __INTN_MAX(n) __stdint_join3( INT, n, _MAX)
+#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
+
+#define  INTPTR_MIN  __INTN_MIN(__INTPTR_WIDTH__)
+#define  INTPTR_MAX  __INTN_MAX(__INTPTR_WIDTH__)
+#define UINTPTR_MAX __UINTN_MAX(__INTPTR_WIDTH__)
+#define PTRDIFF_MIN  __INTN_MIN(__PTRDIFF_WIDTH__)
+#define PTRDIFF_MAX  __INTN_MAX(__PTRDIFF_WIDTH__)
+#define    SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__)
+
+/* C99 7.18.2.5 Limits of greatest-width integer types. */
+#define INTMAX_MIN   __INTN_MIN(__INTMAX_WIDTH__)
+#define INTMAX_MAX   __INTN_MAX(__INTMAX_WIDTH__)
+#define UINTMAX_MAX __UINTN_MAX(__INTMAX_WIDTH__)
+
+/* C99 7.18.3 Limits of other integer types. */
+#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
+#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
+#define WINT_MIN       __INTN_MIN(__WINT_WIDTH__)
+#define WINT_MAX       __INTN_MAX(__WINT_WIDTH__)
+
+/* FIXME: if we ever support a target with unsigned wchar_t, this should be
+ * 0 .. Max.
+ */
+#ifndef WCHAR_MAX
+#define WCHAR_MAX __INTN_MAX(__WCHAR_WIDTH__)
+#endif
+#ifndef WCHAR_MIN
+#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
+#endif
+
+/* 7.18.4.2 Macros for greatest-width integer constants. */
+#define  __INTN_C(n, v) __stdint_join3( INT, n, _C(v))
+#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
+
+#define INTMAX_C(v)   __INTN_C(__INTMAX_WIDTH__, v)
+#define UINTMAX_C(v) __UINTN_C(__INTMAX_WIDTH__, v)
+
+#endif /* __STDC_HOSTED__ */
+#endif /* __CLANG_STDINT_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/tgmath.h b/contrib/llvm/tools/clang/lib/Headers/tgmath.h
new file mode 100644
index 0000000..e1a0023
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/tgmath.h
@@ -0,0 +1,1358 @@
+/*===---- tgmath.h - Standard header for type generic math ----------------===*\
+ *
+ * Copyright (c) 2009 Howard Hinnant
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#ifndef __TGMATH_H
+#define __TGMATH_H
+
+/* C99 7.22 Type-generic math <tgmath.h>. */
+#include <math.h>
+
+/* C++ handles type genericity with overloading in math.h. */
+#ifndef __cplusplus
+#include <complex.h>
+
+#define _TG_ATTRSp __attribute__((__overloadable__))
+#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))
+
+// promotion
+
+typedef void _Argument_type_is_not_arithmetic;
+static _Argument_type_is_not_arithmetic __tg_promote(...)
+  __attribute__((__unavailable__,__overloadable__));
+static double               _TG_ATTRSp __tg_promote(int);
+static double               _TG_ATTRSp __tg_promote(unsigned int);
+static double               _TG_ATTRSp __tg_promote(long);
+static double               _TG_ATTRSp __tg_promote(unsigned long);
+static double               _TG_ATTRSp __tg_promote(long long);
+static double               _TG_ATTRSp __tg_promote(unsigned long long);
+static float                _TG_ATTRSp __tg_promote(float);
+static double               _TG_ATTRSp __tg_promote(double);
+static long double          _TG_ATTRSp __tg_promote(long double);
+static float _Complex       _TG_ATTRSp __tg_promote(float _Complex);
+static double _Complex      _TG_ATTRSp __tg_promote(double _Complex);
+static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);
+
+#define __tg_promote1(__x)           (__typeof__(__tg_promote(__x)))
+#define __tg_promote2(__x, __y)      (__typeof__(__tg_promote(__x) + \
+                                                 __tg_promote(__y)))
+#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \
+                                                 __tg_promote(__y) + \
+                                                 __tg_promote(__z)))
+
+// acos
+
+static float
+    _TG_ATTRS
+    __tg_acos(float __x) {return acosf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_acos(double __x) {return acos(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_acos(long double __x) {return acosl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_acos(float _Complex __x) {return cacosf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_acos(double _Complex __x) {return cacos(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_acos(long double _Complex __x) {return cacosl(__x);}
+
+#undef acos
+#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))
+
+// asin
+
+static float
+    _TG_ATTRS
+    __tg_asin(float __x) {return asinf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_asin(double __x) {return asin(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_asin(long double __x) {return asinl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_asin(float _Complex __x) {return casinf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_asin(double _Complex __x) {return casin(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_asin(long double _Complex __x) {return casinl(__x);}
+
+#undef asin
+#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))
+
+// atan
+
+static float
+    _TG_ATTRS
+    __tg_atan(float __x) {return atanf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_atan(double __x) {return atan(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_atan(long double __x) {return atanl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_atan(float _Complex __x) {return catanf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_atan(double _Complex __x) {return catan(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_atan(long double _Complex __x) {return catanl(__x);}
+
+#undef atan
+#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))
+
+// acosh
+
+static float
+    _TG_ATTRS
+    __tg_acosh(float __x) {return acoshf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_acosh(double __x) {return acosh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_acosh(long double __x) {return acoshl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_acosh(float _Complex __x) {return cacoshf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_acosh(double _Complex __x) {return cacosh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_acosh(long double _Complex __x) {return cacoshl(__x);}
+
+#undef acosh
+#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))
+
+// asinh
+
+static float
+    _TG_ATTRS
+    __tg_asinh(float __x) {return asinhf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_asinh(double __x) {return asinh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_asinh(long double __x) {return asinhl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_asinh(float _Complex __x) {return casinhf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_asinh(double _Complex __x) {return casinh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_asinh(long double _Complex __x) {return casinhl(__x);}
+
+#undef asinh
+#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))
+
+// atanh
+
+static float
+    _TG_ATTRS
+    __tg_atanh(float __x) {return atanhf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_atanh(double __x) {return atanh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_atanh(long double __x) {return atanhl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_atanh(float _Complex __x) {return catanhf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_atanh(double _Complex __x) {return catanh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_atanh(long double _Complex __x) {return catanhl(__x);}
+
+#undef atanh
+#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))
+
+// cos
+
+static float
+    _TG_ATTRS
+    __tg_cos(float __x) {return cosf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_cos(double __x) {return cos(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_cos(long double __x) {return cosl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_cos(float _Complex __x) {return ccosf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_cos(double _Complex __x) {return ccos(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_cos(long double _Complex __x) {return ccosl(__x);}
+
+#undef cos
+#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))
+
+// sin
+
+static float
+    _TG_ATTRS
+    __tg_sin(float __x) {return sinf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_sin(double __x) {return sin(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_sin(long double __x) {return sinl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_sin(float _Complex __x) {return csinf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_sin(double _Complex __x) {return csin(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_sin(long double _Complex __x) {return csinl(__x);}
+
+#undef sin
+#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))
+
+// tan
+
+static float
+    _TG_ATTRS
+    __tg_tan(float __x) {return tanf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_tan(double __x) {return tan(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_tan(long double __x) {return tanl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_tan(float _Complex __x) {return ctanf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_tan(double _Complex __x) {return ctan(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_tan(long double _Complex __x) {return ctanl(__x);}
+
+#undef tan
+#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))
+
+// cosh
+
+static float
+    _TG_ATTRS
+    __tg_cosh(float __x) {return coshf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_cosh(double __x) {return cosh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_cosh(long double __x) {return coshl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_cosh(float _Complex __x) {return ccoshf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_cosh(double _Complex __x) {return ccosh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_cosh(long double _Complex __x) {return ccoshl(__x);}
+
+#undef cosh
+#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))
+
+// sinh
+
+static float
+    _TG_ATTRS
+    __tg_sinh(float __x) {return sinhf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_sinh(double __x) {return sinh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_sinh(long double __x) {return sinhl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_sinh(float _Complex __x) {return csinhf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_sinh(double _Complex __x) {return csinh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_sinh(long double _Complex __x) {return csinhl(__x);}
+
+#undef sinh
+#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))
+
+// tanh
+
+static float
+    _TG_ATTRS
+    __tg_tanh(float __x) {return tanhf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_tanh(double __x) {return tanh(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_tanh(long double __x) {return tanhl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_tanh(float _Complex __x) {return ctanhf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_tanh(double _Complex __x) {return ctanh(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_tanh(long double _Complex __x) {return ctanhl(__x);}
+
+#undef tanh
+#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))
+
+// exp
+
+static float
+    _TG_ATTRS
+    __tg_exp(float __x) {return expf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_exp(double __x) {return exp(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_exp(long double __x) {return expl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_exp(float _Complex __x) {return cexpf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_exp(double _Complex __x) {return cexp(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_exp(long double _Complex __x) {return cexpl(__x);}
+
+#undef exp
+#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))
+
+// log
+
+static float
+    _TG_ATTRS
+    __tg_log(float __x) {return logf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_log(double __x) {return log(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_log(long double __x) {return logl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_log(float _Complex __x) {return clogf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_log(double _Complex __x) {return clog(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_log(long double _Complex __x) {return clogl(__x);}
+
+#undef log
+#define log(__x) __tg_log(__tg_promote1((__x))(__x))
+
+// pow
+
+static float
+    _TG_ATTRS
+    __tg_pow(float __x, float __y) {return powf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_pow(double __x, double __y) {return pow(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_pow(long double __x, long double __y) {return powl(__x, __y);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_pow(long double _Complex __x, long double _Complex __y) 
+    {return cpowl(__x, __y);}
+
+#undef pow
+#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \
+                               __tg_promote2((__x), (__y))(__y))
+
+// sqrt
+
+static float
+    _TG_ATTRS
+    __tg_sqrt(float __x) {return sqrtf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_sqrt(double __x) {return sqrt(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_sqrt(long double __x) {return sqrtl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_sqrt(float _Complex __x) {return csqrtf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_sqrt(double _Complex __x) {return csqrt(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}
+
+#undef sqrt
+#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))
+
+// fabs
+
+static float
+    _TG_ATTRS
+    __tg_fabs(float __x) {return fabsf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_fabs(double __x) {return fabs(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_fabs(long double __x) {return fabsl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_fabs(float _Complex __x) {return cabsf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_fabs(double _Complex __x) {return cabs(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_fabs(long double _Complex __x) {return cabsl(__x);}
+
+#undef fabs
+#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))
+
+// atan2
+
+static float
+    _TG_ATTRS
+    __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_atan2(double __x, double __y) {return atan2(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}
+
+#undef atan2
+#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \
+                                   __tg_promote2((__x), (__y))(__y))
+
+// cbrt
+
+static float
+    _TG_ATTRS
+    __tg_cbrt(float __x) {return cbrtf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_cbrt(double __x) {return cbrt(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_cbrt(long double __x) {return cbrtl(__x);}
+
+#undef cbrt
+#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))
+
+// ceil
+
+static float
+    _TG_ATTRS
+    __tg_ceil(float __x) {return ceilf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_ceil(double __x) {return ceil(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_ceil(long double __x) {return ceill(__x);}
+
+#undef ceil
+#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))
+
+// copysign
+
+static float
+    _TG_ATTRS
+    __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_copysign(double __x, double __y) {return copysign(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}
+
+#undef copysign
+#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \
+                                         __tg_promote2((__x), (__y))(__y))
+
+// erf
+
+static float
+    _TG_ATTRS
+    __tg_erf(float __x) {return erff(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_erf(double __x) {return erf(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_erf(long double __x) {return erfl(__x);}
+
+#undef erf
+#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))
+
+// erfc
+
+static float
+    _TG_ATTRS
+    __tg_erfc(float __x) {return erfcf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_erfc(double __x) {return erfc(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_erfc(long double __x) {return erfcl(__x);}
+
+#undef erfc
+#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))
+
+// exp2
+
+static float
+    _TG_ATTRS
+    __tg_exp2(float __x) {return exp2f(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_exp2(double __x) {return exp2(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_exp2(long double __x) {return exp2l(__x);}
+
+#undef exp2
+#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))
+
+// expm1
+
+static float
+    _TG_ATTRS
+    __tg_expm1(float __x) {return expm1f(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_expm1(double __x) {return expm1(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_expm1(long double __x) {return expm1l(__x);}
+
+#undef expm1
+#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))
+
+// fdim
+
+static float
+    _TG_ATTRS
+    __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_fdim(double __x, double __y) {return fdim(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}
+
+#undef fdim
+#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \
+                                 __tg_promote2((__x), (__y))(__y))
+
+// floor
+
+static float
+    _TG_ATTRS
+    __tg_floor(float __x) {return floorf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_floor(double __x) {return floor(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_floor(long double __x) {return floorl(__x);}
+
+#undef floor
+#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))
+
+// fma
+
+static float
+    _TG_ATTRS
+    __tg_fma(float __x, float __y, float __z)
+    {return fmaf(__x, __y, __z);}
+
+static double
+    _TG_ATTRS
+    __tg_fma(double __x, double __y, double __z)
+    {return fma(__x, __y, __z);}
+
+static long double
+    _TG_ATTRS
+    __tg_fma(long double __x,long double __y, long double __z)
+    {return fmal(__x, __y, __z);}
+
+#undef fma
+#define fma(__x, __y, __z)                                \
+        __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \
+                 __tg_promote3((__x), (__y), (__z))(__y), \
+                 __tg_promote3((__x), (__y), (__z))(__z))
+
+// fmax
+
+static float
+    _TG_ATTRS
+    __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_fmax(double __x, double __y) {return fmax(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}
+
+#undef fmax
+#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \
+                                 __tg_promote2((__x), (__y))(__y))
+
+// fmin
+
+static float
+    _TG_ATTRS
+    __tg_fmin(float __x, float __y) {return fminf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_fmin(double __x, double __y) {return fmin(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}
+
+#undef fmin
+#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \
+                                 __tg_promote2((__x), (__y))(__y))
+
+// fmod
+
+static float
+    _TG_ATTRS
+    __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_fmod(double __x, double __y) {return fmod(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}
+
+#undef fmod
+#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \
+                                 __tg_promote2((__x), (__y))(__y))
+
+// frexp
+
+static float
+    _TG_ATTRS
+    __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}
+
+#undef frexp
+#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)
+
+// hypot
+
+static float
+    _TG_ATTRS
+    __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_hypot(double __x, double __y) {return hypot(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}
+
+#undef hypot
+#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \
+                                   __tg_promote2((__x), (__y))(__y))
+
+// ilogb
+
+static int
+    _TG_ATTRS
+    __tg_ilogb(float __x) {return ilogbf(__x);}
+
+static int
+    _TG_ATTRS
+    __tg_ilogb(double __x) {return ilogb(__x);}
+
+static int
+    _TG_ATTRS
+    __tg_ilogb(long double __x) {return ilogbl(__x);}
+
+#undef ilogb
+#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))
+
+// ldexp
+
+static float
+    _TG_ATTRS
+    __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}
+
+#undef ldexp
+#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)
+
+// lgamma
+
+static float
+    _TG_ATTRS
+    __tg_lgamma(float __x) {return lgammaf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_lgamma(double __x) {return lgamma(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_lgamma(long double __x) {return lgammal(__x);}
+
+#undef lgamma
+#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))
+
+// llrint
+
+static long long
+    _TG_ATTRS
+    __tg_llrint(float __x) {return llrintf(__x);}
+
+static long long
+    _TG_ATTRS
+    __tg_llrint(double __x) {return llrint(__x);}
+
+static long long
+    _TG_ATTRS
+    __tg_llrint(long double __x) {return llrintl(__x);}
+
+#undef llrint
+#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))
+
+// llround
+
+static long long
+    _TG_ATTRS
+    __tg_llround(float __x) {return llroundf(__x);}
+
+static long long
+    _TG_ATTRS
+    __tg_llround(double __x) {return llround(__x);}
+
+static long long
+    _TG_ATTRS
+    __tg_llround(long double __x) {return llroundl(__x);}
+
+#undef llround
+#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))
+
+// log10
+
+static float
+    _TG_ATTRS
+    __tg_log10(float __x) {return log10f(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_log10(double __x) {return log10(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_log10(long double __x) {return log10l(__x);}
+
+#undef log10
+#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))
+
+// log1p
+
+static float
+    _TG_ATTRS
+    __tg_log1p(float __x) {return log1pf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_log1p(double __x) {return log1p(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_log1p(long double __x) {return log1pl(__x);}
+
+#undef log1p
+#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))
+
+// log2
+
+static float
+    _TG_ATTRS
+    __tg_log2(float __x) {return log2f(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_log2(double __x) {return log2(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_log2(long double __x) {return log2l(__x);}
+
+#undef log2
+#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))
+
+// lrint
+
+static long
+    _TG_ATTRS
+    __tg_lrint(float __x) {return lrintf(__x);}
+
+static long
+    _TG_ATTRS
+    __tg_lrint(double __x) {return lrint(__x);}
+
+static long
+    _TG_ATTRS
+    __tg_lrint(long double __x) {return lrintl(__x);}
+
+#undef lrint
+#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))
+
+// lround
+
+static long
+    _TG_ATTRS
+    __tg_lround(float __x) {return lroundf(__x);}
+
+static long
+    _TG_ATTRS
+    __tg_lround(double __x) {return lround(__x);}
+
+static long
+    _TG_ATTRS
+    __tg_lround(long double __x) {return lroundl(__x);}
+
+#undef lround
+#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))
+
+// nearbyint
+
+static float
+    _TG_ATTRS
+    __tg_nearbyint(float __x) {return nearbyintf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_nearbyint(double __x) {return nearbyint(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_nearbyint(long double __x) {return nearbyintl(__x);}
+
+#undef nearbyint
+#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))
+
+// nextafter
+
+static float
+    _TG_ATTRS
+    __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}
+
+#undef nextafter
+#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \
+                                           __tg_promote2((__x), (__y))(__y))
+
+// nexttoward
+
+static float
+    _TG_ATTRS
+    __tg_nexttoward(float __x, float __y) {return nexttowardf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_nexttoward(double __x, double __y) {return nexttoward(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}
+
+#undef nexttoward
+#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote2((__x), (__y))(__x), \
+                                             __tg_promote2((__x), (__y))(__y))
+
+// remainder
+
+static float
+    _TG_ATTRS
+    __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_remainder(double __x, double __y) {return remainder(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}
+
+#undef remainder
+#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \
+                                           __tg_promote2((__x), (__y))(__y))
+
+// remquo
+
+static float
+    _TG_ATTRS
+    __tg_remquo(float __x, float __y, int* __z)
+    {return remquof(__x, __y, __z);}
+
+static double
+    _TG_ATTRS
+    __tg_remquo(double __x, double __y, int* __z)
+    {return remquo(__x, __y, __z);}
+
+static long double
+    _TG_ATTRS
+    __tg_remquo(long double __x,long double __y, int* __z)
+    {return remquol(__x, __y, __z);}
+
+#undef remquo
+#define remquo(__x, __y, __z)                         \
+        __tg_remquo(__tg_promote2((__x), (__y))(__x), \
+                    __tg_promote2((__x), (__y))(__y), \
+                    (__z))
+
+// rint
+
+static float
+    _TG_ATTRS
+    __tg_rint(float __x) {return rintf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_rint(double __x) {return rint(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_rint(long double __x) {return rintl(__x);}
+
+#undef rint
+#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))
+
+// round
+
+static float
+    _TG_ATTRS
+    __tg_round(float __x) {return roundf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_round(double __x) {return round(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_round(long double __x) {return roundl(__x);}
+
+#undef round
+#define round(__x) __tg_round(__tg_promote1((__x))(__x))
+
+// scalbn
+
+static float
+    _TG_ATTRS
+    __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}
+
+#undef scalbn
+#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)
+
+// scalbln
+
+static float
+    _TG_ATTRS
+    __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}
+
+static double
+    _TG_ATTRS
+    __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}
+
+static long double
+    _TG_ATTRS
+    __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}
+
+#undef scalbln
+#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)
+
+// tgamma
+
+static float
+    _TG_ATTRS
+    __tg_tgamma(float __x) {return tgammaf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_tgamma(double __x) {return tgamma(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_tgamma(long double __x) {return tgammal(__x);}
+
+#undef tgamma
+#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))
+
+// trunc
+
+static float
+    _TG_ATTRS
+    __tg_trunc(float __x) {return truncf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_trunc(double __x) {return trunc(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_trunc(long double __x) {return truncl(__x);}
+
+#undef trunc
+#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))
+
+// carg
+
+static float
+    _TG_ATTRS
+    __tg_carg(float __x) {return atan2f(0.F, __x);}
+
+static double
+    _TG_ATTRS
+    __tg_carg(double __x) {return atan2(0., __x);}
+
+static long double
+    _TG_ATTRS
+    __tg_carg(long double __x) {return atan2l(0.L, __x);}
+
+static float
+    _TG_ATTRS
+    __tg_carg(float _Complex __x) {return cargf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_carg(double _Complex __x) {return carg(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_carg(long double _Complex __x) {return cargl(__x);}
+
+#undef carg
+#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))
+
+// cimag
+
+static float
+    _TG_ATTRS
+    __tg_cimag(float __x) {return 0;}
+
+static double
+    _TG_ATTRS
+    __tg_cimag(double __x) {return 0;}
+
+static long double
+    _TG_ATTRS
+    __tg_cimag(long double __x) {return 0;}
+
+static float
+    _TG_ATTRS
+    __tg_cimag(float _Complex __x) {return cimagf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_cimag(double _Complex __x) {return cimag(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_cimag(long double _Complex __x) {return cimagl(__x);}
+
+#undef cimag
+#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))
+
+// conj
+
+static float _Complex
+    _TG_ATTRS
+    __tg_conj(float __x) {return __x;}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_conj(double __x) {return __x;}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_conj(long double __x) {return __x;}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_conj(float _Complex __x) {return conjf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_conj(double _Complex __x) {return conj(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_conj(long double _Complex __x) {return conjl(__x);}
+
+#undef conj
+#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))
+
+// cproj
+
+static float _Complex
+    _TG_ATTRS
+    __tg_cproj(float __x) {return cprojf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_cproj(double __x) {return cproj(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_cproj(long double __x) {return cprojl(__x);}
+
+static float _Complex
+    _TG_ATTRS
+    __tg_cproj(float _Complex __x) {return cprojf(__x);}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_cproj(double _Complex __x) {return cproj(__x);}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_cproj(long double _Complex __x) {return cprojl(__x);}
+
+#undef cproj
+#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))
+
+// creal
+
+static float _Complex
+    _TG_ATTRS
+    __tg_creal(float __x) {return __x;}
+
+static double _Complex
+    _TG_ATTRS
+    __tg_creal(double __x) {return __x;}
+
+static long double _Complex
+    _TG_ATTRS
+    __tg_creal(long double __x) {return __x;}
+
+static float
+    _TG_ATTRS
+    __tg_creal(float _Complex __x) {return crealf(__x);}
+
+static double
+    _TG_ATTRS
+    __tg_creal(double _Complex __x) {return creal(__x);}
+
+static long double
+    _TG_ATTRS
+    __tg_creal(long double _Complex __x) {return creall(__x);}
+
+#undef creal
+#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))
+
+#undef _TG_ATTRSp
+#undef _TG_ATTRS
+
+#endif /* __cplusplus */
+#endif /* __TGMATH_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
new file mode 100644
index 0000000..07fea1c
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
@@ -0,0 +1,218 @@
+/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+ 
+#ifndef __TMMINTRIN_H
+#define __TMMINTRIN_H
+
+#ifndef __SSSE3__
+#error "SSSE3 instruction set not enabled"
+#else
+
+#include <pmmintrin.h>
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_abs_pi8(__m64 a)
+{
+    return (__m64)__builtin_ia32_pabsb((__v8qi)a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_abs_epi8(__m128i a)
+{
+    return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_abs_pi16(__m64 a)
+{
+    return (__m64)__builtin_ia32_pabsw((__v4hi)a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_abs_epi16(__m128i a)
+{
+    return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_abs_pi32(__m64 a)
+{
+    return (__m64)__builtin_ia32_pabsd((__v2si)a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_abs_epi32(__m128i a)
+{
+    return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
+}
+
+#define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n)))
+#define _mm_alignr_pi8(a, b, n) (__builtin_ia32_palignr((a), (b), (n)))
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_epi32(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hadd_pi32(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hadds_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hadds_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_epi32(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hsub_pi32(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_hsubs_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_hsubs_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maddubs_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_maddubs_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mulhrs_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_mulhrs_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_shuffle_epi8(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_shuffle_pi8(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sign_epi8(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sign_epi16(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_sign_epi32(__m128i a, __m128i b)
+{
+    return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sign_pi8(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);  
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sign_pi16(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);  
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sign_pi32(__m64 a, __m64 b)
+{
+    return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
+}
+
+#endif /* __SSSE3__ */
+
+#endif /* __TMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/varargs.h b/contrib/llvm/tools/clang/lib/Headers/varargs.h
new file mode 100644
index 0000000..b5477d0
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/varargs.h
@@ -0,0 +1,26 @@
+/*===---- varargs.h - Variable argument handling -------------------------------------===
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+* THE SOFTWARE.
+*
+*===-----------------------------------------------------------------------===
+*/
+#ifndef __VARARGS_H
+#define __VARARGS_H
+  #error "Please use <stdarg.h> instead of <varargs.h>"
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
new file mode 100644
index 0000000..6b2e468
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
@@ -0,0 +1,67 @@
+/*===---- wmmintrin.h - AES intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _WMMINTRIN_H
+#define _WMMINTRIN_H
+
+#if !defined (__AES__)
+# error "AES instructions not enabled"
+#else
+
+#include <smmintrin.h>
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_aesenc_si128(__m128i __V, __m128i __R)
+{
+  return (__m128i)__builtin_ia32_aesenc128(__V, __R);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_aesenclast_si128(__m128i __V, __m128i __R)
+{
+  return (__m128i)__builtin_ia32_aesenclast128(__V, __R);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_aesdec_si128(__m128i __V, __m128i __R)
+{
+  return (__m128i)__builtin_ia32_aesdec128(__V, __R);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_aesdeclast_si128(__m128i __V, __m128i __R)
+{
+  return (__m128i)__builtin_ia32_aesdeclast128(__V, __R);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_aesimc_si128(__m128i __V)
+{
+  return (__m128i)__builtin_ia32_aesimc128(__V);
+}
+
+#define _mm_aeskeygenassist_si128(C, R) \
+  __builtin_ia32_aeskeygenassist128((C), (R))
+
+#endif /* __AES__ */
+#endif /* _WMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
new file mode 100644
index 0000000..3e82e28
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
@@ -0,0 +1,918 @@
+/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+ 
+#ifndef __XMMINTRIN_H
+#define __XMMINTRIN_H
+ 
+#ifndef __SSE__
+#error "SSE instruction set not enabled"
+#else
+
+#include <mmintrin.h>
+
+typedef int __v4si __attribute__((__vector_size__(16)));
+typedef float __v4sf __attribute__((__vector_size__(16)));
+typedef float __m128 __attribute__((__vector_size__(16)));
+
+#include <mm_malloc.h>
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_add_ss(__m128 a, __m128 b)
+{
+  a[0] += b[0];
+  return a;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_add_ps(__m128 a, __m128 b)
+{
+  return a + b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_ss(__m128 a, __m128 b)
+{
+  a[0] -= b[0];
+  return a;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_sub_ps(__m128 a, __m128 b)
+{
+  return a - b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_mul_ss(__m128 a, __m128 b)
+{
+  a[0] *= b[0];
+  return a;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_mul_ps(__m128 a, __m128 b)
+{
+  return a * b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_div_ss(__m128 a, __m128 b)
+{
+  a[0] /= b[0];
+  return a;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_div_ps(__m128 a, __m128 b)
+{
+  return a / b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_sqrt_ss(__m128 a)
+{
+  return __builtin_ia32_sqrtss(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_sqrt_ps(__m128 a)
+{
+  return __builtin_ia32_sqrtps(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rcp_ss(__m128 a)
+{
+  return __builtin_ia32_rcpss(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rcp_ps(__m128 a)
+{
+  return __builtin_ia32_rcpps(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt_ss(__m128 a)
+{
+  return __builtin_ia32_rsqrtss(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt_ps(__m128 a)
+{
+  return __builtin_ia32_rsqrtps(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_min_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_minss(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_min_ps(__m128 a, __m128 b)
+{
+  return __builtin_ia32_minps(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_max_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_maxss(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_max_ps(__m128 a, __m128 b)
+{
+  return __builtin_ia32_maxps(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_and_ps(__m128 a, __m128 b)
+{
+  return (__m128)((__v4si)a & (__v4si)b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_andnot_ps(__m128 a, __m128 b)
+{
+  return (__m128)(~(__v4si)a & (__v4si)b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_or_ps(__m128 a, __m128 b)
+{
+  return (__m128)((__v4si)a | (__v4si)b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_xor_ps(__m128 a, __m128 b)
+{
+  return (__m128)((__v4si)a ^ (__v4si)b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 0);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 0);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 1);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 1);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 2);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 2);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(b, a, 1);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(b, a, 1);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(b, a, 2);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(b, a, 2);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 4);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 4);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnlt_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnlt_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnle_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 6);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnle_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 6);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpngt_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(b, a, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpngt_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(b, a, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnge_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(b, a, 6);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpnge_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(b, a, 6);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpord_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 7);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpord_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 7);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpunord_ss(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpss(a, b, 3);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpunord_ps(__m128 a, __m128 b)
+{
+  return (__m128)__builtin_ia32_cmpps(a, b, 3);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comieq_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comieq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comilt_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comilt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comile_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comile(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comigt_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comigt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comige_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comige(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_comineq_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_comineq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomieq_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomieq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomilt_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomilt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomile_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomile(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomigt_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomigt(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomige_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomige(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_ucomineq_ss(__m128 a, __m128 b)
+{
+  return __builtin_ia32_ucomineq(a, b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvtss_si32(__m128 a)
+{
+  return __builtin_ia32_cvtss2si(a);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvt_ss2si(__m128 a)
+{
+  return _mm_cvtss_si32(a);
+}
+
+#ifdef __x86_64__
+
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvtss_si64(__m128 a)
+{
+  return __builtin_ia32_cvtss2si64(a);
+}
+
+#endif
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtps_pi32(__m128 a)
+{
+  return (__m64)__builtin_ia32_cvtps2pi(a);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvttss_si32(__m128 a)
+{
+  return a[0];
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_cvtt_ss2si(__m128 a)
+{
+  return _mm_cvttss_si32(a);
+}
+
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+_mm_cvttss_si64(__m128 a)
+{
+  return a[0];
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvttps_pi32(__m128 a)
+{
+  return (__m64)__builtin_ia32_cvttps2pi(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi32_ss(__m128 a, int b)
+{
+  a[0] = b;
+  return a;
+}
+
+#ifdef __x86_64__
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtsi64_ss(__m128 a, long long b)
+{
+  a[0] = b;
+  return a;
+}
+
+#endif
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpi32_ps(__m128 a, __m64 b)
+{
+  return __builtin_ia32_cvtpi2ps(a, (__v2si)b);
+}
+
+static __inline__ float __attribute__((__always_inline__, __nodebug__))
+_mm_cvtss_f32(__m128 a)
+{
+  return a[0];
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_loadh_pi(__m128 a, const __m64 *p)
+{
+  __m128 b;
+  b[0] = *(float*)p;
+  b[1] = *((float*)p+1);
+  return __builtin_shufflevector(a, b, 0, 1, 4, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_loadl_pi(__m128 a, const __m64 *p)
+{
+  __m128 b;
+  b[0] = *(float*)p;
+  b[1] = *((float*)p+1);
+  return __builtin_shufflevector(a, b, 4, 5, 2, 3);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_load_ss(const float *p)
+{
+  return (__m128){ *p, 0, 0, 0 };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_load1_ps(const float *p)
+{
+  return (__m128){ *p, *p, *p, *p };
+}
+
+#define        _mm_load_ps1(p) _mm_load1_ps(p)
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_load_ps(const float *p)
+{
+  return *(__m128*)p;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_loadu_ps(const float *p)
+{
+  return __builtin_ia32_loadups(p);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_loadr_ps(const float *p)
+{
+  __m128 a = _mm_load_ps(p);
+  return __builtin_shufflevector(a, a, 3, 2, 1, 0);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_set_ss(float w)
+{
+  return (__m128){ w, 0, 0, 0 };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_set1_ps(float w)
+{
+  return (__m128){ w, w, w, w };
+}
+
+// Microsoft specific.
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_set_ps1(float w)
+{
+    return _mm_set1_ps(w);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_set_ps(float z, float y, float x, float w)
+{
+  return (__m128){ w, x, y, z };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_setr_ps(float z, float y, float x, float w)
+{
+  return (__m128){ z, y, x, w };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_setzero_ps(void)
+{
+  return (__m128){ 0, 0, 0, 0 };
+}
+
+static __inline__ void __attribute__((__always_inline__))
+_mm_storeh_pi(__m64 *p, __m128 a)
+{
+  __builtin_ia32_storehps((__v2si *)p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__))
+_mm_storel_pi(__m64 *p, __m128 a)
+{
+  __builtin_ia32_storelps((__v2si *)p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__))
+_mm_store_ss(float *p, __m128 a)
+{
+  *p = a[0];
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storeu_ps(float *p, __m128 a)
+{
+  __builtin_ia32_storeups(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store1_ps(float *p, __m128 a)
+{
+  a = __builtin_shufflevector(a, a, 0, 0, 0, 0);
+  _mm_storeu_ps(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_store_ps(float *p, __m128 a)
+{
+  *(__m128 *)p = a;
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_storer_ps(float *p, __m128 a)
+{
+  a = __builtin_shufflevector(a, a, 3, 2, 1, 0);
+  _mm_store_ps(p, a);
+}
+
+#define _MM_HINT_T0 1
+#define _MM_HINT_T1 2
+#define _MM_HINT_T2 3
+#define _MM_HINT_NTA 0
+
+/* FIXME: We have to #define this because "sel" must be a constant integer, and 
+   Sema doesn't do any form of constant propagation yet. */
+
+#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)a, 0, sel))
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_pi(__m64 *p, __m64 a)
+{
+  __builtin_ia32_movntq(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_stream_ps(float *p, __m128 a)
+{
+  __builtin_ia32_movntps(p, a);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_sfence(void)
+{
+  __builtin_ia32_sfence();
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_extract_pi16(__m64 a, int n)
+{
+  __v4hi b = (__v4hi)a;
+  return (unsigned short)b[n & 3];
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_insert_pi16(__m64 a, int d, int n)
+{
+   __v4hi b = (__v4hi)a;
+   b[n & 3] = d;
+   return (__m64)b;
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_max_pi16(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_max_pu8(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_min_pi16(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_min_pu8(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_movemask_pi8(__m64 a)
+{
+  return __builtin_ia32_pmovmskb((__v8qi)a);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_mulhi_pu16(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b);  
+}
+
+#define _mm_shuffle_pi16(a, n) \
+  ((__m64)__builtin_shufflevector((__v4hi)(a), (__v4hi) {0}, \
+                                  (n) & 0x3, ((n) & 0xc) >> 2, \
+                                  ((n) & 0x30) >> 4, ((n) & 0xc0) >> 6))
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_maskmove_si64(__m64 d, __m64 n, char *p)
+{
+  __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_avg_pu8(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_avg_pu16(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_sad_pu8(__m64 a, __m64 b)
+{
+  return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b);
+}
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_mm_getcsr(void)
+{
+  return __builtin_ia32_stmxcsr();
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_mm_setcsr(unsigned int i)
+{
+  __builtin_ia32_ldmxcsr(i);
+}
+
+#define _mm_shuffle_ps(a, b, mask) \
+        (__builtin_shufflevector((__v4sf)a, (__v4sf)b, \
+                                 (mask) & 0x3, ((mask) & 0xc) >> 2, \
+                                 (((mask) & 0x30) >> 4) + 4, \
+                                 (((mask) & 0xc0) >> 6) + 4))
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_unpackhi_ps(__m128 a, __m128 b)
+{
+  return __builtin_shufflevector(a, b, 2, 6, 3, 7);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_unpacklo_ps(__m128 a, __m128 b)
+{
+  return __builtin_shufflevector(a, b, 0, 4, 1, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_move_ss(__m128 a, __m128 b)
+{
+  return __builtin_shufflevector(a, b, 4, 1, 2, 3);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_movehl_ps(__m128 a, __m128 b)
+{
+  return __builtin_shufflevector(a, b, 6, 7, 2, 3);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_movelh_ps(__m128 a, __m128 b)
+{
+  return __builtin_shufflevector(a, b, 0, 1, 4, 5);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpi16_ps(__m64 a)
+{
+  __m64 b, c;
+  __m128 r;
+
+  b = _mm_setzero_si64();
+  b = _mm_cmpgt_pi16(b, a);
+  c = _mm_unpackhi_pi16(a, b);  
+  r = _mm_setzero_ps();
+  r = _mm_cvtpi32_ps(r, c);
+  r = _mm_movelh_ps(r, r);
+  c = _mm_unpacklo_pi16(a, b);  
+  r = _mm_cvtpi32_ps(r, c);
+
+  return r;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpu16_ps(__m64 a)
+{
+  __m64 b, c;
+  __m128 r;
+
+  b = _mm_setzero_si64();
+  c = _mm_unpackhi_pi16(a, b);  
+  r = _mm_setzero_ps();
+  r = _mm_cvtpi32_ps(r, c);
+  r = _mm_movelh_ps(r, r);
+  c = _mm_unpacklo_pi16(a, b);  
+  r = _mm_cvtpi32_ps(r, c);
+
+  return r;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpi8_ps(__m64 a)
+{
+  __m64 b;
+  
+  b = _mm_setzero_si64();
+  b = _mm_cmpgt_pi8(b, a);
+  b = _mm_unpacklo_pi8(a, b);
+
+  return _mm_cvtpi16_ps(b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpu8_ps(__m64 a)
+{
+  __m64 b;
+  
+  b = _mm_setzero_si64();
+  b = _mm_unpacklo_pi8(a, b);
+
+  return _mm_cvtpi16_ps(b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtpi32x2_ps(__m64 a, __m64 b)
+{
+  __m128 c;
+  
+  c = _mm_setzero_ps();  
+  c = _mm_cvtpi32_ps(c, b);
+  c = _mm_movelh_ps(c, c);
+
+  return _mm_cvtpi32_ps(c, a);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtps_pi16(__m128 a)
+{
+  __m64 b, c;
+  
+  b = _mm_cvtps_pi32(a);
+  a = _mm_movehl_ps(a, a);
+  c = _mm_cvtps_pi32(a);
+  
+  return _mm_packs_pi16(b, c);
+}
+
+static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+_mm_cvtps_pi8(__m128 a)
+{
+  __m64 b, c;
+  
+  b = _mm_cvtps_pi16(a);
+  c = _mm_setzero_si64();
+  
+  return _mm_packs_pi16(b, c);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_mm_movemask_ps(__m128 a)
+{
+  return __builtin_ia32_movmskps(a);
+}
+
+#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+#define _MM_EXCEPT_INVALID    (0x0001)
+#define _MM_EXCEPT_DENORM     (0x0002)
+#define _MM_EXCEPT_DIV_ZERO   (0x0004)
+#define _MM_EXCEPT_OVERFLOW   (0x0008)
+#define _MM_EXCEPT_UNDERFLOW  (0x0010)
+#define _MM_EXCEPT_INEXACT    (0x0020)
+#define _MM_EXCEPT_MASK       (0x003f)
+
+#define _MM_MASK_INVALID      (0x0080)
+#define _MM_MASK_DENORM       (0x0100)
+#define _MM_MASK_DIV_ZERO     (0x0200)
+#define _MM_MASK_OVERFLOW     (0x0400)
+#define _MM_MASK_UNDERFLOW    (0x0800)
+#define _MM_MASK_INEXACT      (0x1000)
+#define _MM_MASK_MASK         (0x1f80)
+
+#define _MM_ROUND_NEAREST     (0x0000)
+#define _MM_ROUND_DOWN        (0x2000)
+#define _MM_ROUND_UP          (0x4000)
+#define _MM_ROUND_TOWARD_ZERO (0x6000)
+#define _MM_ROUND_MASK        (0x6000)
+
+#define _MM_FLUSH_ZERO_MASK   (0x8000)
+#define _MM_FLUSH_ZERO_ON     (0x8000)
+#define _MM_FLUSH_ZERO_OFF    (0x8000)
+
+#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
+#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
+#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
+#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
+
+#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
+#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
+#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
+#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
+
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
+do { \
+  __m128 tmp3, tmp2, tmp1, tmp0; \
+  tmp0 = _mm_unpacklo_ps((row0), (row1)); \
+  tmp2 = _mm_unpacklo_ps((row2), (row3)); \
+  tmp1 = _mm_unpackhi_ps((row0), (row1)); \
+  tmp3 = _mm_unpackhi_ps((row2), (row3)); \
+  (row0) = _mm_movelh_ps(tmp0, tmp2); \
+  (row1) = _mm_movehl_ps(tmp2, tmp0); \
+  (row2) = _mm_movelh_ps(tmp1, tmp3); \
+  (row3) = _mm_movehl_ps(tmp3, tmp1); \
+} while (0)
+
+/* Ugly hack for backwards-compatibility (compatible with gcc) */
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#endif /* __SSE__ */
+
+#endif /* __XMMINTRIN_H */