60 files changed, 950 insertions, 171 deletions
diff --git a/contrib/compiler-rt/lib/builtins/README.txt b/contrib/compiler-rt/lib/builtins/README.txt
index 1c08e74..ad36e4e 100644
--- a/contrib/compiler-rt/lib/builtins/README.txt
+++ b/contrib/compiler-rt/lib/builtins/README.txt
@@ -220,7 +220,9 @@ _Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions,
 // for use with some implementations of assert() in <assert.h>
 void __eprintf(const char* format, const char* assertion_expression,
 				const char* line, const char* file);
-				
+
+// for systems with emulated thread local storage
+void* __emutls_get_address(struct __emutls_control*);
 
 
 //   Power PC specific functions
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
new file mode 100644
index 0000000..036a6f5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
@@ -0,0 +1,96 @@
+//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cdcmpeq(double a, double b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cdcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cdcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        bne __aeabi_cdcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+
+
+// void __aeabi_cdcmple(double a, double b) {
+//   if (__aeabi_dcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_dcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_dcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+
+// int __aeabi_cdrcmple(double a, double b) {
+//   return __aeabi_cdcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+        // Swap r0 and r2
+        mov ip, r0
+        mov r0, r2
+        mov r2, ip
+
+        // Swap r1 and r3
+        mov ip, r1
+        mov r1, r3
+        mov r3, ip
+
+        b __aeabi_cdcmple
+END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
new file mode 100644
index 0000000..577f6b2c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cdcmpeq_check_nan(double a, double b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
new file mode 100644
index 0000000..43594e5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
@@ -0,0 +1,91 @@
+//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cfcmpeq(float a, float b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cfcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cfcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        bne __aeabi_cfcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+
+
+// void __aeabi_cfcmple(float a, float b) {
+//   if (__aeabi_fcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_fcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_fcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+
+// int __aeabi_cfrcmple(float a, float b) {
+//   return __aeabi_cfcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+        // Swap r0 and r1
+        mov ip, r0
+        mov r0, r1
+        mov r1, ip
+
+        b __aeabi_cfcmple
+END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
new file mode 100644
index 0000000..992e31f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cfcmpeq_check_nan(float a, float b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
new file mode 100644
index 0000000..fc17d5a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_dsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_drsub(fp_t a, fp_t b) {
+    return __aeabi_dsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
new file mode 100644
index 0000000..64258dc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_fsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_frsub(fp_t a, fp_t b) {
+    return __aeabi_fsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/assembly.h b/contrib/compiler-rt/lib/builtins/assembly.h
index 8bb0ddc..c289705 100644
--- a/contrib/compiler-rt/lib/builtins/assembly.h
+++ b/contrib/compiler-rt/lib/builtins/assembly.h
@@ -73,6 +73,15 @@
 #define JMPc(r, c) mov##c pc, r
 #endif
 
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC()                                                               \
+  pop {ip};                                                                    \
+  JMP(ip)
+#endif
+
 #if __ARM_ARCH_ISA_THUMB == 2
 #define IT(cond)  it cond
 #define ITT(cond) itt cond
diff --git a/contrib/compiler-rt/lib/builtins/atomic.c b/contrib/compiler-rt/lib/builtins/atomic.c
index 35c8837dc..f1ddc3e 100644
--- a/contrib/compiler-rt/lib/builtins/atomic.c
+++ b/contrib/compiler-rt/lib/builtins/atomic.c
@@ -56,13 +56,13 @@ static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
 #include <machine/atomic.h>
 #include <sys/umtx.h>
 typedef struct _usem Lock;
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE);
   __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
   if (l->_has_waiters)
       _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0);
 }
-inline static void lock(Lock *l) {
+__inline static void lock(Lock *l) {
   uint32_t old = 1;
   while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old,
         0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
@@ -76,12 +76,12 @@ static Lock locks[SPINLOCK_COUNT] = { [0 ...  SPINLOCK_COUNT-1] = {0,1,0} };
 #elif defined(__APPLE__)
 #include <libkern/OSAtomic.h>
 typedef OSSpinLock Lock;
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   OSSpinLockUnlock(l);
 }
 /// Locks a lock.  In the current implementation, this is potentially
 /// unbounded in the contended case.
-inline static void lock(Lock *l) {  
+__inline static void lock(Lock *l) {
   OSSpinLockLock(l);
 }
 static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
@@ -89,12 +89,12 @@ static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
 #else
 typedef _Atomic(uintptr_t) Lock;
 /// Unlock a lock.  This is a release operation.
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   __c11_atomic_store(l, 0, __ATOMIC_RELEASE);
 }
 /// Locks a lock.  In the current implementation, this is potentially
 /// unbounded in the contended case.
-inline static void lock(Lock *l) {
+__inline static void lock(Lock *l) {
   uintptr_t old = 0;
   while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
         __ATOMIC_RELAXED))
@@ -106,7 +106,7 @@ static Lock locks[SPINLOCK_COUNT];
 
 
 /// Returns a lock to use for a given pointer.  
-static inline Lock *lock_for_pointer(void *ptr) {
+static __inline Lock *lock_for_pointer(void *ptr) {
   intptr_t hash = (intptr_t)ptr;
   // Disregard the lowest 4 bits.  We want all values that may be part of the
   // same memory operation to hash to the same value and therefore use the same
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c
index 15984cd..da912af 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_clear
 void atomic_flag_clear(volatile atomic_flag *object) {
-  return __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
+  __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c
index 0f7859c..1059b78 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c
@@ -12,9 +12,17 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_clear_explicit
 void atomic_flag_clear_explicit(volatile atomic_flag *object,
                                 memory_order order) {
-  return __c11_atomic_store(&(object)->_Value, 0, order);
+  __c11_atomic_store(&(object)->_Value, 0, order);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c
index 07209fc..e8811d3 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_test_and_set
 _Bool atomic_flag_test_and_set(volatile atomic_flag *object) {
   return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c
index eaa5be0..5c8c2df 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c
@@ -12,9 +12,17 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_test_and_set_explicit
 _Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
                                         memory_order order) {
   return __c11_atomic_exchange(&(object)->_Value, 1, order);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c
index ad292d2..9ccc2ae 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_signal_fence
 void atomic_signal_fence(memory_order order) {
   __c11_atomic_signal_fence(order);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c
index 71f698c..d225601 100644
--- a/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c
+++ b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_thread_fence
 void atomic_thread_fence(memory_order order) {
   __c11_atomic_thread_fence(order);
 }
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/comparedf2.c b/contrib/compiler-rt/lib/builtins/comparedf2.c
index 64eea12..9e29752 100644
--- a/contrib/compiler-rt/lib/builtins/comparedf2.c
+++ b/contrib/compiler-rt/lib/builtins/comparedf2.c
@@ -80,6 +80,11 @@ __ledf2(fp_t a, fp_t b) {
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpdf2, __ledf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/contrib/compiler-rt/lib/builtins/comparesf2.c b/contrib/compiler-rt/lib/builtins/comparesf2.c
index 442289c..1fd5063 100644
--- a/contrib/compiler-rt/lib/builtins/comparesf2.c
+++ b/contrib/compiler-rt/lib/builtins/comparesf2.c
@@ -80,6 +80,11 @@ __lesf2(fp_t a, fp_t b) {
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpsf2, __lesf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/contrib/compiler-rt/lib/builtins/comparetf2.c b/contrib/compiler-rt/lib/builtins/comparetf2.c
index a6436de..c0ad8ed 100644
--- a/contrib/compiler-rt/lib/builtins/comparetf2.c
+++ b/contrib/compiler-rt/lib/builtins/comparetf2.c
@@ -79,6 +79,11 @@ COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) {
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmptf2, __letf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/contrib/compiler-rt/lib/builtins/divdc3.c b/contrib/compiler-rt/lib/builtins/divdc3.c
index 7de78c8..3c88390 100644
--- a/contrib/compiler-rt/lib/builtins/divdc3.c
+++ b/contrib/compiler-rt/lib/builtins/divdc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI double _Complex
+COMPILER_RT_ABI Dcomplex
 __divdc3(double __a, double __b, double __c, double __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@ __divdc3(double __a, double __b, double __c, double __d)
         __d = crt_scalbn(__d, -__ilogbw);
     }
     double __denom = __c * __c + __d * __d;
-    double _Complex z;
-    __real__ z = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Dcomplex z;
+    COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysign(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysign(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
             __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0.0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
             __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
-            __real__ z = 0.0 * (__a * __c + __b * __d);
-            __imag__ z = 0.0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/divsc3.c b/contrib/compiler-rt/lib/builtins/divsc3.c
index 710d532..42a4831 100644
--- a/contrib/compiler-rt/lib/builtins/divsc3.c
+++ b/contrib/compiler-rt/lib/builtins/divsc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI float _Complex
+COMPILER_RT_ABI Fcomplex
 __divsc3(float __a, float __b, float __c, float __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@ __divsc3(float __a, float __b, float __c, float __d)
         __d = crt_scalbnf(__d, -__ilogbw);
     }
     float __denom = __c * __c + __d * __d;
-    float _Complex z;
-    __real__ z = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Fcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignf(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignf(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
             __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
             __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
-            __real__ z = 0 * (__a * __c + __b * __d);
-            __imag__ z = 0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/divtc3.c b/contrib/compiler-rt/lib/builtins/divtc3.c
new file mode 100644
index 0000000..04693df
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divtc3.c
@@ -0,0 +1,60 @@
+/*===-- divtc3.c - Implement __divtc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divtc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI long double _Complex
+__divtc3(long double __a, long double __b, long double __c, long double __d)
+{
+    int __ilogbw = 0;
+    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbnl(__c, -__ilogbw);
+        __d = crt_scalbnl(__d, -__ilogbw);
+    }
+    long double __denom = __c * __c + __d * __d;
+    long double _Complex z;
+    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    {
+        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
+            __real__ z = 0.0 * (__a * __c + __b * __d);
+            __imag__ z = 0.0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/divxc3.c b/contrib/compiler-rt/lib/builtins/divxc3.c
index 175ae3c..6f49280 100644
--- a/contrib/compiler-rt/lib/builtins/divxc3.c
+++ b/contrib/compiler-rt/lib/builtins/divxc3.c
@@ -18,7 +18,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __divxc3(long double __a, long double __b, long double __c, long double __d)
 {
     int __ilogbw = 0;
@@ -30,31 +30,31 @@ __divxc3(long double __a, long double __b, long double __c, long double __d)
         __d = crt_scalbnl(__d, -__ilogbw);
     }
     long double __denom = __c * __c + __d * __d;
-    long double _Complex z;
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
             __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
             __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
-            __real__ z = 0 * (__a * __c + __b * __d);
-            __imag__ z = 0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/emutls.c b/contrib/compiler-rt/lib/builtins/emutls.c
new file mode 100644
index 0000000..09e7956
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/emutls.c
@@ -0,0 +1,183 @@
+/* ===---------- emutls.c - Implements __emutls_get_address ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "int_lib.h"
+#include "int_util.h"
+
+/* Default is not to use posix_memalign, so systems like Android
+ * can use thread local data without heavier POSIX memory allocators.
+ */
+#ifndef EMUTLS_USE_POSIX_MEMALIGN
+#define EMUTLS_USE_POSIX_MEMALIGN 0
+#endif
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    size_t size;  /* size of the object in bytes */
+    size_t align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base;
+#if EMUTLS_USE_POSIX_MEMALIGN
+    if (posix_memalign(&base, align, size) != 0)
+        abort();
+#else
+    #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
+    char* object;
+    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+        abort();
+    base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
+                    & ~(uintptr_t)(align - 1));
+
+    ((void**)base)[-1] = object;
+#endif
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+#if EMUTLS_USE_POSIX_MEMALIGN
+    free(base);
+#else
+    /* The mallocated address is in ((void**)base)[-1] */
+    free(((void**)base)[-1]);
+#endif
+}
+
+/* Emulated TLS objects are always allocated at run-time. */
+static __inline void *emutls_allocate_object(__emutls_control *control) {
+    /* Use standard C types, check with gcc's emutls.o. */
+    typedef unsigned int gcc_word __attribute__((mode(word)));
+    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+    COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
+
+    size_t size = control->size;
+    size_t align = control->align;
+    if (align < sizeof(void*))
+        align = sizeof(void*);
+    /* Make sure that align is power of 2. */
+    if ((align & (align - 1)) != 0)
+        abort();
+
+    void* base = emutls_memalign_alloc(align, size);
+    if (control->value)
+        memcpy(base, control->value, size);
+    else
+        memset(base, 0, size);
+    return base;
+}
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static pthread_key_t emutls_pthread_key;
+
+static void emutls_key_destructor(void* ptr) {
+    emutls_address_array* array = (emutls_address_array*)ptr;
+    uintptr_t i;
+    for (i = 0; i < array->size; ++i) {
+        if (array->data[i])
+            emutls_memalign_free(array->data[i]);
+    }
+    free(ptr);
+}
+
+static void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+/* Returns control->object.index; set index if not allocated yet. */
+static __inline uintptr_t emutls_get_index(__emutls_control *control) {
+    uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
+    if (!index) {
+        static pthread_once_t once = PTHREAD_ONCE_INIT;
+        pthread_once(&once, emutls_init);
+        pthread_mutex_lock(&emutls_mutex);
+        index = control->object.index;
+        if (!index) {
+            index = ++emutls_num_object;
+            __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
+        }
+        pthread_mutex_unlock(&emutls_mutex);
+    }
+    return index;
+}
+
+/* Updates newly allocated thread local emutls_address_array. */
+static __inline void emutls_check_array_set_size(emutls_address_array *array,
+                                                 uintptr_t size) {
+    if (array == NULL)
+        abort();
+    array->size = size;
+    pthread_setspecific(emutls_pthread_key, (void*)array);
+}
+
+/* Returns the new 'data' array size, number of elements,
+ * which must be no smaller than the given index.
+ */
+static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
+   /* Need to allocate emutls_address_array with one extra slot
+    * to store the data array size.
+    * Round up the emutls_address_array size to multiple of 16.
+    */
+    return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
+}
+
+/* Returns the thread local emutls_address_array.
+ * Extends its size if necessary to hold address at index.
+ */
+static __inline emutls_address_array *
+emutls_get_address_array(uintptr_t index) {
+    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    if (array == NULL) {
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = calloc(new_size + 1, sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    } else if (index > array->size) {
+        uintptr_t orig_size = array->size;
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = realloc(array, (new_size + 1) * sizeof(void*));
+        if (array)
+            memset(array->data + orig_size, 0,
+                   (new_size - orig_size) * sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    }
+    return array;
+}
+
+void* __emutls_get_address(__emutls_control* control) {
+    uintptr_t index = emutls_get_index(control);
+    emutls_address_array* array = emutls_get_address_array(index);
+    if (array->data[index - 1] == NULL)
+        array->data[index - 1] = emutls_allocate_object(control);
+    return array->data[index - 1];
+}
diff --git a/contrib/compiler-rt/lib/builtins/enable_execute_stack.c b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c
index 23e4940..0dc3482 100644
--- a/contrib/compiler-rt/lib/builtins/enable_execute_stack.c
+++ b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c
@@ -21,8 +21,8 @@
 #define HAVE_SYSCONF 1
 
 #ifdef _WIN32
-#include <windef.h>
-#include <winbase.h>
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
 #else
 #ifndef __APPLE__
 #include <unistd.h>
diff --git a/contrib/compiler-rt/lib/builtins/extendhfsf2.c b/contrib/compiler-rt/lib/builtins/extendhfsf2.c
index 7524e2e..27115a4 100644
--- a/contrib/compiler-rt/lib/builtins/extendhfsf2.c
+++ b/contrib/compiler-rt/lib/builtins/extendhfsf2.c
@@ -12,9 +12,11 @@
 #define DST_SINGLE
 #include "fp_extend_impl.inc"
 
+ARM_EABI_FNALIAS(h2f, extendhfsf2)
+
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((noinline)) float __extendhfsf2(uint16_t a) {
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
     return __extendXfYf2__(a);
 }
 
diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfdi.c b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c
index 2e0d87e..4b0bc9e1 100644
--- a/contrib/compiler-rt/lib/builtins/fixunsdfdi.c
+++ b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c
@@ -22,8 +22,8 @@ COMPILER_RT_ABI du_int
 __fixunsdfdi(double a)
 {
     if (a <= 0.0) return 0;
-    su_int high = a/0x1p32f;
-    su_int low = a - (double)high*0x1p32f;
+    su_int high = a / 4294967296.f;               /* a / 0x1p32f; */
+    su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */
     return ((du_int)high << 32) | low;
 }
 
diff --git a/contrib/compiler-rt/lib/builtins/fixunssfdi.c b/contrib/compiler-rt/lib/builtins/fixunssfdi.c
index 5a154e8..f8ebab8 100644
--- a/contrib/compiler-rt/lib/builtins/fixunssfdi.c
+++ b/contrib/compiler-rt/lib/builtins/fixunssfdi.c
@@ -23,8 +23,8 @@ __fixunssfdi(float a)
 {
     if (a <= 0.0f) return 0;
     double da = a;
-    su_int high = da/0x1p32f;
-    su_int low = da - (double)high*0x1p32f;
+    su_int high = da / 4294967296.f;               /* da / 0x1p32f; */
+    su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */
     return ((du_int)high << 32) | low;
 }
 
diff --git a/contrib/compiler-rt/lib/builtins/floatdidf.c b/contrib/compiler-rt/lib/builtins/floatdidf.c
index e53fa25..a300c9f 100644
--- a/contrib/compiler-rt/lib/builtins/floatdidf.c
+++ b/contrib/compiler-rt/lib/builtins/floatdidf.c
@@ -32,8 +32,8 @@ ARM_EABI_FNALIAS(l2d, floatdidf)
 COMPILER_RT_ABI double
 __floatdidf(di_int a)
 {
-	static const double twop52 = 0x1.0p52;
-	static const double twop32 = 0x1.0p32;
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop32 = 4294967296.0; // 0x1.0p32
 	
 	union { int64_t x; double d; } low = { .d = twop52 };
 	
diff --git a/contrib/compiler-rt/lib/builtins/floatditf.c b/contrib/compiler-rt/lib/builtins/floatditf.c
index 1a5f8e5..cd51dd8 100644
--- a/contrib/compiler-rt/lib/builtins/floatditf.c
+++ b/contrib/compiler-rt/lib/builtins/floatditf.c
@@ -27,19 +27,17 @@ COMPILER_RT_ABI fp_t __floatditf(di_int a) {
 
     // All other cases begin by extracting the sign and absolute value of a
     rep_t sign = 0;
-    unsigned aAbs = (unsigned)a;
+    du_int aAbs = (du_int)a;
     if (a < 0) {
         sign = signBit;
-        aAbs += 0x80000000;
+        aAbs = ~(du_int)a + 1U;
     }
 
     // Exponent of (fp_t)a is the width of abs(a).
-    const int exponent = (aWidth - 1) - __builtin_clzll(a);
+    const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
     rep_t result;
 
-    // Shift a into the significand field and clear the implicit bit.  Extra
-    // cast to unsigned int is necessary to get the correct behavior for
-    // the input INT_MIN.
+    // Shift a into the significand field, rounding if it is a right-shift
     const int shift = significandBits - exponent;
     result = (rep_t)aAbs << shift ^ implicitBit;
 
diff --git a/contrib/compiler-rt/lib/builtins/floatsitf.c b/contrib/compiler-rt/lib/builtins/floatsitf.c
index 8534693..f0abca3 100644
--- a/contrib/compiler-rt/lib/builtins/floatsitf.c
+++ b/contrib/compiler-rt/lib/builtins/floatsitf.c
@@ -30,16 +30,14 @@ COMPILER_RT_ABI fp_t __floatsitf(int a) {
     unsigned aAbs = (unsigned)a;
     if (a < 0) {
         sign = signBit;
-        aAbs += 0x80000000;
+        aAbs = ~(unsigned)a + 1U;
     }
 
     // Exponent of (fp_t)a is the width of abs(a).
-    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    const int exponent = (aWidth - 1) - __builtin_clz(aAbs);
     rep_t result;
 
-    // Shift a into the significand field and clear the implicit bit.  Extra
-    // cast to unsigned int is necessary to get the correct behavior for
-    // the input INT_MIN.
+    // Shift a into the significand field and clear the implicit bit.
     const int shift = significandBits - exponent;
     result = (rep_t)aAbs << shift ^ implicitBit;
 
diff --git a/contrib/compiler-rt/lib/builtins/floatundidf.c b/contrib/compiler-rt/lib/builtins/floatundidf.c
index 73b8bac..67aa86e 100644
--- a/contrib/compiler-rt/lib/builtins/floatundidf.c
+++ b/contrib/compiler-rt/lib/builtins/floatundidf.c
@@ -32,9 +32,9 @@ ARM_EABI_FNALIAS(ul2d, floatundidf)
 COMPILER_RT_ABI double
 __floatundidf(du_int a)
 {
-	static const double twop52 = 0x1.0p52;
-	static const double twop84 = 0x1.0p84;
-	static const double twop84_plus_twop52 = 0x1.00000001p84;
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+	static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
 	
 	union { uint64_t x; double d; } high = { .d = twop84 };
 	union { uint64_t x; double d; } low = { .d = twop52 };
diff --git a/contrib/compiler-rt/lib/builtins/fp_add_impl.inc b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc
index 5741889..b47be1b 100644
--- a/contrib/compiler-rt/lib/builtins/fp_add_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fp_t __addXf3__(fp_t a, fp_t b) {
+static __inline fp_t __addXf3__(fp_t a, fp_t b) {
     rep_t aRep = toRep(a);
     rep_t bRep = toRep(b);
     const rep_t aAbs = aRep & absMask;
diff --git a/contrib/compiler-rt/lib/builtins/fp_extend.h b/contrib/compiler-rt/lib/builtins/fp_extend.h
index 5c2b923..6d95a06 100644
--- a/contrib/compiler-rt/lib/builtins/fp_extend.h
+++ b/contrib/compiler-rt/lib/builtins/fp_extend.h
@@ -28,7 +28,7 @@ typedef double src_t;
 typedef uint64_t src_rep_t;
 #define SRC_REP_C UINT64_C
 static const int srcSigBits = 52;
-static inline int src_rep_t_clz(src_rep_t a) {
+static __inline int src_rep_t_clz(src_rep_t a) {
 #if defined __LP64__
     return __builtin_clzl(a);
 #else
@@ -75,12 +75,12 @@ static const int dstSigBits = 112;
 // End of specialization parameters.  Two helper routines for conversion to and
 // from the representation of floating-point data as integer values follow.
 
-static inline src_rep_t srcToRep(src_t x) {
+static __inline src_rep_t srcToRep(src_t x) {
     const union { src_t f; src_rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline dst_t dstFromRep(dst_rep_t x) {
+static __inline dst_t dstFromRep(dst_rep_t x) {
     const union { dst_t f; dst_rep_t i; } rep = {.i = x};
     return rep.f;
 }
diff --git a/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc
index edcfa8d..b785cc7 100644
--- a/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc
@@ -38,7 +38,7 @@
 
 #include "fp_extend.h"
 
-static inline dst_t __extendXfYf2__(src_t a) {
+static __inline dst_t __extendXfYf2__(src_t a) {
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
     const int srcBits = sizeof(src_t)*CHAR_BIT;
diff --git a/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc
index 035e87c..da70d4d 100644
--- a/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fixint_t __fixint(fp_t a) {
+static __inline fixint_t __fixint(fp_t a) {
     const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
     const fixint_t fixint_min = -fixint_max - 1;
     // Break a into sign, exponent, significand
diff --git a/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc
index 5fefab0..d68ccf2 100644
--- a/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fixuint_t __fixuint(fp_t a) {
+static __inline fixuint_t __fixuint(fp_t a) {
     // Break a into sign, exponent, significand
     const rep_t aRep = toRep(a);
     const rep_t aAbs = aRep & absMask;
@@ -27,7 +27,7 @@ static inline fixuint_t __fixuint(fp_t a) {
         return 0;
 
     // If the value is too large for the integer type, saturate.
-    if ((unsigned)exponent > sizeof(fixuint_t) * CHAR_BIT)
+    if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT)
         return ~(fixuint_t)0;
 
     // If 0 <= exponent < significandBits, right shift to get the result.
diff --git a/contrib/compiler-rt/lib/builtins/fp_lib.h b/contrib/compiler-rt/lib/builtins/fp_lib.h
index faebb99..223fb98 100644
--- a/contrib/compiler-rt/lib/builtins/fp_lib.h
+++ b/contrib/compiler-rt/lib/builtins/fp_lib.h
@@ -46,12 +46,12 @@ typedef float fp_t;
 #define REP_C UINT32_C
 #define significandBits 23
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
     return __builtin_clz(a);
 }
 
 // 32x32 --> 64 bit multiply
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
     const uint64_t product = (uint64_t)a*b;
     *hi = product >> 32;
     *lo = product;
@@ -66,7 +66,7 @@ typedef double fp_t;
 #define REP_C UINT64_C
 #define significandBits 52
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
 #if defined __LP64__
     return __builtin_clzl(a);
 #else
@@ -83,7 +83,7 @@ static inline int rep_clz(rep_t a) {
 // 64x64 -> 128 wide multiply for platforms that don't have such an operation;
 // many 64-bit platforms have this operation, but they tend to have hardware
 // floating-point, so we don't bother with a special case for them here.
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
     // Each of the component 32x32 -> 64 products
     const uint64_t plolo = loWord(a) * loWord(b);
     const uint64_t plohi = loWord(a) * hiWord(b);
@@ -112,7 +112,7 @@ typedef long double fp_t;
 // 128-bit integer, we let the constant be casted to 128-bit integer
 #define significandBits 112
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
     const union
         {
              __uint128_t ll;
@@ -148,7 +148,7 @@ static inline int rep_clz(rep_t a) {
 // 128x128 -> 256 wide multiply for platforms that don't have such an operation;
 // many 64-bit platforms have this operation, but they tend to have hardware
 // floating-point, so we don't bother with a special case for them here.
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 
     const uint64_t product11 = Word_1(a) * Word_1(b);
     const uint64_t product12 = Word_1(a) * Word_2(b);
@@ -228,28 +228,28 @@ static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 #define quietBit        (implicitBit >> 1)
 #define qnanRep         (exponentMask | quietBit)
 
-static inline rep_t toRep(fp_t x) {
+static __inline rep_t toRep(fp_t x) {
     const union { fp_t f; rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline fp_t fromRep(rep_t x) {
+static __inline fp_t fromRep(rep_t x) {
     const union { fp_t f; rep_t i; } rep = {.i = x};
     return rep.f;
 }
 
-static inline int normalize(rep_t *significand) {
+static __inline int normalize(rep_t *significand) {
     const int shift = rep_clz(*significand) - rep_clz(implicitBit);
     *significand <<= shift;
     return 1 - shift;
 }
 
-static inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
     *hi = *hi << count | *lo >> (typeWidth - count);
     *lo = *lo << count;
 }
 
-static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
+static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
     if (count < typeWidth) {
         const bool sticky = *lo << (typeWidth - count);
         *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
diff --git a/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc
index ca8a0bb..b34aa1b 100644
--- a/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fp_t __mulXf3__(fp_t a, fp_t b) {
+static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
     const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
     const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
     const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc.h b/contrib/compiler-rt/lib/builtins/fp_trunc.h
index 373ba1b..d5e79bb 100644
--- a/contrib/compiler-rt/lib/builtins/fp_trunc.h
+++ b/contrib/compiler-rt/lib/builtins/fp_trunc.h
@@ -63,12 +63,12 @@ static const int dstSigBits = 10;
 // End of specialization parameters.  Two helper routines for conversion to and
 // from the representation of floating-point data as integer values follow.
 
-static inline src_rep_t srcToRep(src_t x) {
+static __inline src_rep_t srcToRep(src_t x) {
     const union { src_t f; src_rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline dst_t dstFromRep(dst_rep_t x) {
+static __inline dst_t dstFromRep(dst_rep_t x) {
     const union { dst_t f; dst_rep_t i; } rep = {.i = x};
     return rep.f;
 }
diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc
index 372e8d6..d88ae06 100644
--- a/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc
+++ b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc
@@ -39,7 +39,7 @@
 
 #include "fp_trunc.h"
 
-static inline dst_t __truncXfYf2__(src_t a) {
+static __inline dst_t __truncXfYf2__(src_t a) {
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
     const int srcBits = sizeof(src_t)*CHAR_BIT;
diff --git a/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c
index 4b95cfd..ed544d3 100644
--- a/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c
+++ b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c
@@ -141,7 +141,8 @@ static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding)
  * throw through a C function compiled with -fexceptions.
  */
 #if __USING_SJLJ_EXCEPTIONS__
-// the setjump-longjump based exceptions personality routine has a different name
+/* the setjump-longjump based exceptions personality routine has a
+ * different name */
 COMPILER_RT_ABI _Unwind_Reason_Code
 __gcc_personality_sj0(int version, _Unwind_Action actions,
          uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
@@ -194,15 +195,15 @@ __gcc_personality_v0(int version, _Unwind_Action actions,
              * Set Instruction Pointer to so we re-enter function 
              * at landing pad. The landing pad is created by the compiler
              * to take two parameters in registers.
-	     */
-            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), 
-                                                (uintptr_t)exceptionObject);
+             */
+            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
+                          (uintptr_t)exceptionObject);
             _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
-            _Unwind_SetIP(context, funcStart+landingPad);
+            _Unwind_SetIP(context, (funcStart + landingPad));
             return _URC_INSTALL_CONTEXT;
         }
     }
-    
+
     /* No landing pad found, continue unwinding. */
     return _URC_CONTINUE_UNWIND;
 }
diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk.S b/contrib/compiler-rt/lib/builtins/i386/chkstk.S
index 3733d72..b599748 100644
--- a/contrib/compiler-rt/lib/builtins/i386/chkstk.S
+++ b/contrib/compiler-rt/lib/builtins/i386/chkstk.S
@@ -19,13 +19,13 @@ DEFINE_COMPILERRT_FUNCTION(__chkstk_ms)
         jb     1f
 2:
         sub    $0x1000,%ecx
-        orl    $0,(%ecx)
+        test   %ecx,(%ecx)
         sub    $0x1000,%eax
         cmp    $0x1000,%eax
         ja     2b
 1:
         sub    %eax,%ecx
-        orl    $0,(%ecx)
+        test   %ecx,(%ecx)
         pop    %eax
         pop    %ecx
         ret
diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk2.S b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S
new file mode 100644
index 0000000..7d65bb0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S
@@ -0,0 +1,40 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __i386__
+
+// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
+// then decrement %esp by %eax.  Preserves all registers except %esp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+        push   %ecx
+        cmp    $0x1000,%eax
+        lea    8(%esp),%ecx     // esp before calling this routine -> ecx
+        jb     1f
+2:
+        sub    $0x1000,%ecx
+        test   %ecx,(%ecx)
+        sub    $0x1000,%eax
+        cmp    $0x1000,%eax
+        ja     2b
+1:
+        sub    %eax,%ecx
+        test   %ecx,(%ecx)
+
+        lea    4(%esp),%eax     // load pointer to the return address into eax
+        mov    %ecx,%esp        // install the new top of stack pointer into esp
+        mov    -4(%eax),%ecx    // restore ecx
+        push   (%eax)           // push return address onto the stack
+        sub    %esp,%eax        // restore the original value in eax
+        ret
+END_COMPILERRT_FUNCTION(__chkstk)
+END_COMPILERRT_FUNCTION(_alloca)
+
+#endif // __i386__
diff --git a/contrib/compiler-rt/lib/builtins/int_lib.h b/contrib/compiler-rt/lib/builtins/int_lib.h
index 985534d..0fb03ff 100644
--- a/contrib/compiler-rt/lib/builtins/int_lib.h
+++ b/contrib/compiler-rt/lib/builtins/int_lib.h
@@ -20,6 +20,13 @@
 /* Assumption: Right shift of signed negative is arithmetic shift. */
 /* Assumption: Endianness is little or big (not mixed). */
 
+#if defined(__ELF__)
+#define FNALIAS(alias_name, original_name) \
+  void alias_name() __attribute__((alias(#original_name)))
+#else
+#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#endif
+
 /* ABI macro definitions */
 
 #if __ARM_EABI__
@@ -36,13 +43,25 @@
 
 #else
 # define ARM_EABI_FNALIAS(aeabi_name, name)
-# if defined(__arm__) && defined(_WIN32)
+# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__))
 #   define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
 # else
 #   define COMPILER_RT_ABI
 # endif
 #endif
 
+#ifdef _MSC_VER
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
 #if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
 /*
  * Kernel and boot environment can't use normal headers,
@@ -101,4 +120,44 @@ COMPILER_RT_ABI si_int __clzti2(ti_int a);
 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
 #endif
 
+/* Definitions for builtins unavailable on MSVC */
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+  uint32_t trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, value))
+    return trailing_zero;
+  return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse(&leading_zero, value))
+    return 31 - leading_zero;
+  return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse64(&leading_zero, value))
+    return 63 - leading_zero;
+  return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  if (value == 0)
+    return 64;
+  uint32_t msh = (uint32_t)(value >> 32);
+  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+  if (msh != 0)
+    return __builtin_clz(msh);
+  return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif /* defined(_MSC_VER) && !defined(__clang__) */
+
 #endif /* INT_LIB_H */
diff --git a/contrib/compiler-rt/lib/builtins/int_math.h b/contrib/compiler-rt/lib/builtins/int_math.h
index d6b4bda..fc81fb7 100644
--- a/contrib/compiler-rt/lib/builtins/int_math.h
+++ b/contrib/compiler-rt/lib/builtins/int_math.h
@@ -25,43 +25,90 @@
 #  define  __has_builtin(x) 0
 #endif
 
-#define CRT_INFINITY __builtin_huge_valf()
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <math.h>
+#include <stdlib.h>
+#include <ymath.h>
+#endif
 
-#define crt_isinf(x) __builtin_isinf((x))
-#define crt_isnan(x) __builtin_isnan((x))
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CRT_INFINITY INFINITY
+#else
+#define CRT_INFINITY __builtin_huge_valf()
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_isfinite(x) _finite((x))
+#define crt_isinf(x) !_finite((x))
+#define crt_isnan(x) _isnan((x))
+#else
 /* Define crt_isfinite in terms of the builtin if available, otherwise provide
  * an alternate version in terms of our other functions. This supports some
  * versions of GCC which didn't have __builtin_isfinite.
  */
 #if __has_builtin(__builtin_isfinite)
 #  define crt_isfinite(x) __builtin_isfinite((x))
-#else
+#elif defined(__GNUC__)
 #  define crt_isfinite(x) \
   __extension__(({ \
       __typeof((x)) x_ = (x); \
       !crt_isinf(x_) && !crt_isnan(x_); \
     }))
-#endif
+#else
+#  error "Do not know how to check for infinity"
+#endif /* __has_builtin(__builtin_isfinite) */
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+#endif /* _MSC_VER */
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_copysign(x, y) copysign((x), (y))
+#define crt_copysignf(x, y) copysignf((x), (y))
+#define crt_copysignl(x, y) copysignl((x), (y))
+#else
 #define crt_copysign(x, y) __builtin_copysign((x), (y))
 #define crt_copysignf(x, y) __builtin_copysignf((x), (y))
 #define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fabs(x) fabs((x))
+#define crt_fabsf(x) fabsf((x))
+#define crt_fabsl(x) fabs((x))
+#else
 #define crt_fabs(x) __builtin_fabs((x))
 #define crt_fabsf(x) __builtin_fabsf((x))
 #define crt_fabsl(x) __builtin_fabsl((x))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fmax(x, y) __max((x), (y))
+#define crt_fmaxf(x, y) __max((x), (y))
+#define crt_fmaxl(x, y) __max((x), (y))
+#else
 #define crt_fmax(x, y) __builtin_fmax((x), (y))
 #define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
 #define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_logb(x) logb((x))
+#define crt_logbf(x) logbf((x))
+#define crt_logbl(x) logbl((x))
+#else
 #define crt_logb(x) __builtin_logb((x))
 #define crt_logbf(x) __builtin_logbf((x))
 #define crt_logbl(x) __builtin_logbl((x))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_scalbn(x, y) scalbn((x), (y))
+#define crt_scalbnf(x, y) scalbnf((x), (y))
+#define crt_scalbnl(x, y) scalbnl((x), (y))
+#else
 #define crt_scalbn(x, y) __builtin_scalbn((x), (y))
 #define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
 #define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+#endif
 
 #endif /* INT_MATH_H */
diff --git a/contrib/compiler-rt/lib/builtins/int_types.h b/contrib/compiler-rt/lib/builtins/int_types.h
index aedae14..660385e 100644
--- a/contrib/compiler-rt/lib/builtins/int_types.h
+++ b/contrib/compiler-rt/lib/builtins/int_types.h
@@ -20,6 +20,10 @@
 
 #include "int_endianness.h"
 
+/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
+#ifdef si_int
+#undef si_int
+#endif
 typedef      int si_int;
 typedef unsigned su_int;
 
@@ -57,7 +61,8 @@ typedef union
 } udwords;
 
 /* MIPS64 issue: PR 20098 */
-#if defined(__LP64__) && !(defined(__mips__) && defined(__clang__))
+#if (defined(__LP64__) || defined(__wasm__)) && \
+    !(defined(__mips__) && defined(__clang__))
 #define CRT_HAS_128BIT
 #endif
 
@@ -95,14 +100,14 @@ typedef union
     }s;
 } utwords;
 
-static inline ti_int make_ti(di_int h, di_int l) {
+static __inline ti_int make_ti(di_int h, di_int l) {
     twords r;
     r.s.high = h;
     r.s.low = l;
     return r.all;
 }
 
-static inline tu_int make_tu(du_int h, du_int l) {
+static __inline tu_int make_tu(du_int h, du_int l) {
     utwords r;
     r.s.high = h;
     r.s.low = l;
@@ -140,5 +145,22 @@ typedef union
     long double f;
 } long_double_bits;
 
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct { float real, imaginary; } Fcomplex;
+
+typedef struct { double real, imaginary; } Dcomplex;
+
+typedef struct { long double real, imaginary; } Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
 #endif /* INT_TYPES_H */
 
diff --git a/contrib/compiler-rt/lib/builtins/int_util.c b/contrib/compiler-rt/lib/builtins/int_util.c
index 323e461..420d1e2 100644
--- a/contrib/compiler-rt/lib/builtins/int_util.c
+++ b/contrib/compiler-rt/lib/builtins/int_util.c
@@ -8,8 +8,8 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include "int_util.h"
 #include "int_lib.h"
+#include "int_util.h"
 
 /* NOTE: The definitions in this file are declared weak because we clients to be
  * able to arbitrarily package individual functions into separate .a files. If
@@ -23,7 +23,7 @@
 
 #ifdef KERNEL_USE
 
-extern void panic(const char *, ...) __attribute__((noreturn));
+NORETURN extern void panic(const char *, ...);
 #ifndef _WIN32
 __attribute__((visibility("hidden")))
 #endif
@@ -34,8 +34,8 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) {
 #elif __APPLE__
 
 /* from libSystem.dylib */
-extern void __assert_rtn(const char *func, const char *file, 
-                     int line, const char * message) __attribute__((noreturn));
+NORETURN extern void __assert_rtn(const char *func, const char *file, int line,
+                                  const char *message);
 
 #ifndef _WIN32
 __attribute__((weak))
diff --git a/contrib/compiler-rt/lib/builtins/int_util.h b/contrib/compiler-rt/lib/builtins/int_util.h
index a9b595d..a7b20ed 100644
--- a/contrib/compiler-rt/lib/builtins/int_util.h
+++ b/contrib/compiler-rt/lib/builtins/int_util.h
@@ -20,10 +20,14 @@
 #define INT_UTIL_H
 
 /** \brief Trigger a program abort (or panic for kernel code). */
-#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, \
-                                                 __func__)
+#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__)
 
-void compilerrt_abort_impl(const char *file, int line,
-                           const char *function) __attribute__((noreturn));
+NORETURN void compilerrt_abort_impl(const char *file, int line,
+                                    const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt)                                        \
+  typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
 
 #endif /* INT_UTIL_H */
diff --git a/contrib/compiler-rt/lib/builtins/muldc3.c b/contrib/compiler-rt/lib/builtins/muldc3.c
index 3bfae2c..16d8e98 100644
--- a/contrib/compiler-rt/lib/builtins/muldc3.c
+++ b/contrib/compiler-rt/lib/builtins/muldc3.c
@@ -17,17 +17,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI double _Complex
+COMPILER_RT_ABI Dcomplex
 __muldc3(double __a, double __b, double __c, double __d)
 {
     double __ac = __a * __c;
     double __bd = __b * __d;
     double __ad = __a * __d;
     double __bc = __b * __c;
-    double _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Dcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -65,8 +65,8 @@ __muldc3(double __a, double __b, double __c, double __d)
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/mulsc3.c b/contrib/compiler-rt/lib/builtins/mulsc3.c
index 29d46c6..c89cfd2 100644
--- a/contrib/compiler-rt/lib/builtins/mulsc3.c
+++ b/contrib/compiler-rt/lib/builtins/mulsc3.c
@@ -17,17 +17,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI float _Complex
+COMPILER_RT_ABI Fcomplex
 __mulsc3(float __a, float __b, float __c, float __d)
 {
     float __ac = __a * __c;
     float __bd = __b * __d;
     float __ad = __a * __d;
     float __bc = __b * __c;
-    float _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Fcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -65,8 +65,8 @@ __mulsc3(float __a, float __b, float __c, float __d)
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/mulxc3.c b/contrib/compiler-rt/lib/builtins/mulxc3.c
index 161fd0c..ba32216 100644
--- a/contrib/compiler-rt/lib/builtins/mulxc3.c
+++ b/contrib/compiler-rt/lib/builtins/mulxc3.c
@@ -19,17 +19,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __mulxc3(long double __a, long double __b, long double __c, long double __d)
 {
     long double __ac = __a * __c;
     long double __bd = __b * __d;
     long double __ad = __a * __d;
     long double __bc = __b * __c;
-    long double _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -67,8 +67,8 @@ __mulxc3(long double __a, long double __b, long double __c, long double __d)
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/contrib/compiler-rt/lib/builtins/ppc/DD.h b/contrib/compiler-rt/lib/builtins/ppc/DD.h
index fc3e41c..3e5f9e5 100644
--- a/contrib/compiler-rt/lib/builtins/ppc/DD.h
+++ b/contrib/compiler-rt/lib/builtins/ppc/DD.h
@@ -1,5 +1,5 @@
-#ifndef __DD_HEADER
-#define __DD_HEADER
+#ifndef COMPILERRT_DD_HEADER
+#define COMPILERRT_DD_HEADER
 
 #include "../int_lib.h"
 
@@ -9,7 +9,7 @@ typedef union {
 		double hi;
 		double lo;
 	}s;
-}DD;
+} DD;
 
 typedef union { 
 	double d;
@@ -19,28 +19,27 @@ typedef union {
 #define LOWORDER(xy,xHi,xLo,yHi,yLo) \
 	(((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo))
 
-static inline double __attribute__((always_inline))
-local_fabs(double x)
-{
-	doublebits result = { .d = x };
-	result.x &= UINT64_C(0x7fffffffffffffff);
-	return result.d;
+static __inline ALWAYS_INLINE double local_fabs(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0x7fffffffffffffff);
+  return result.d;
 }
 
-static inline double __attribute__((always_inline))
-high26bits(double x)
-{
-	doublebits result = { .d = x };
-	result.x &= UINT64_C(0xfffffffff8000000);
-	return result.d;
+static __inline ALWAYS_INLINE double high26bits(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0xfffffffff8000000);
+  return result.d;
 }
 
-static inline int __attribute__((always_inline))
-different_sign(double x, double y)
-{
-	doublebits xsignbit = { .d = x }, ysignbit = { .d = y };
-	int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
-	return result;
+static __inline ALWAYS_INLINE int different_sign(double x, double y) {
+  doublebits xsignbit = {.d = x}, ysignbit = {.d = y};
+  int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
+  return result;
 }
 
-#endif /* __DD_HEADER */
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+long double __gcc_qdiv(long double, long double);
+
+#endif /* COMPILERRT_DD_HEADER */
diff --git a/contrib/compiler-rt/lib/builtins/ppc/divtc3.c b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c
index 2991281..8ec41c5 100644
--- a/contrib/compiler-rt/lib/builtins/ppc/divtc3.c
+++ b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c
@@ -14,11 +14,6 @@
     (x).s.lo = 0.0;                                                     \
   }
 
-long double __gcc_qadd(long double, long double);
-long double __gcc_qsub(long double, long double);
-long double __gcc_qmul(long double, long double);
-long double __gcc_qdiv(long double, long double);
-
 long double _Complex
 __divtc3(long double a, long double b, long double c, long double d)
 {
diff --git a/contrib/compiler-rt/lib/builtins/ppc/multc3.c b/contrib/compiler-rt/lib/builtins/ppc/multc3.c
index 738b65a..9dd79c9 100644
--- a/contrib/compiler-rt/lib/builtins/ppc/multc3.c
+++ b/contrib/compiler-rt/lib/builtins/ppc/multc3.c
@@ -17,10 +17,6 @@
     }                                                                   \
   }
 
-long double __gcc_qadd(long double, long double);
-long double __gcc_qsub(long double, long double);
-long double __gcc_qmul(long double, long double);
-
 long double _Complex
 __multc3(long double a, long double b, long double c, long double d)
 {
diff --git a/contrib/compiler-rt/lib/builtins/subdf3.c b/contrib/compiler-rt/lib/builtins/subdf3.c
index 089e062..7a79e5e 100644
--- a/contrib/compiler-rt/lib/builtins/subdf3.c
+++ b/contrib/compiler-rt/lib/builtins/subdf3.c
@@ -23,4 +23,3 @@ __subdf3(fp_t a, fp_t b) {
     return __adddf3(a, fromRep(toRep(b) ^ signBit));
 }
 
-/* FIXME: rsub for ARM EABI */
diff --git a/contrib/compiler-rt/lib/builtins/subsf3.c b/contrib/compiler-rt/lib/builtins/subsf3.c
index 47f5e5e..c3b8514 100644
--- a/contrib/compiler-rt/lib/builtins/subsf3.c
+++ b/contrib/compiler-rt/lib/builtins/subsf3.c
@@ -23,4 +23,3 @@ __subsf3(fp_t a, fp_t b) {
     return __addsf3(a, fromRep(toRep(b) ^ signBit));
 }
 
-/* FIXME: rsub for ARM EABI */
diff --git a/contrib/compiler-rt/lib/builtins/truncdfhf2.c b/contrib/compiler-rt/lib/builtins/truncdfhf2.c
index 0852df3..17195cd 100644
--- a/contrib/compiler-rt/lib/builtins/truncdfhf2.c
+++ b/contrib/compiler-rt/lib/builtins/truncdfhf2.c
@@ -11,6 +11,8 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
+ARM_EABI_FNALIAS(d2h, truncdfhf2)
+
 COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
     return __truncXfYf2__(a);
 }
diff --git a/contrib/compiler-rt/lib/builtins/truncsfhf2.c b/contrib/compiler-rt/lib/builtins/truncsfhf2.c
index 381e590..9d61895 100644
--- a/contrib/compiler-rt/lib/builtins/truncsfhf2.c
+++ b/contrib/compiler-rt/lib/builtins/truncsfhf2.c
@@ -11,9 +11,11 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
+ARM_EABI_FNALIAS(f2h, truncsfhf2)
+
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((noinline)) uint16_t __truncsfhf2(float a) {
+COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
     return __truncXfYf2__(a);
 }
 
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S
index 5759e84..4149ac6 100644
--- a/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S
+++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S
@@ -24,13 +24,13 @@ DEFINE_COMPILERRT_FUNCTION(___chkstk_ms)
         jb     1f
 2:
         sub    $0x1000,%rcx
-        orl    $0,(%rcx)
+        test   %rcx,(%rcx)
         sub    $0x1000,%rax
         cmp    $0x1000,%rax
         ja     2b
 1:
         sub    %rax,%rcx
-        orl    $0,(%rcx)
+        test   %rcx,(%rcx)
         pop    %rax
         pop    %rcx
         ret
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S
new file mode 100644
index 0000000..ac1eb92
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S
@@ -0,0 +1,42 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __x86_64__
+
+// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments,
+// then decrement %rsp by %rax.  Preserves all registers except %rsp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__alloca)
+        mov    %rcx,%rax        // x64 _alloca is a normal function with parameter in rcx
+        // fallthrough
+DEFINE_COMPILERRT_FUNCTION(___chkstk)
+        push   %rcx
+        cmp    $0x1000,%rax
+        lea    16(%rsp),%rcx     // rsp before calling this routine -> rcx
+        jb     1f
+2:
+        sub    $0x1000,%rcx
+        test   %rcx,(%rcx)
+        sub    $0x1000,%rax
+        cmp    $0x1000,%rax
+        ja     2b
+1:
+        sub    %rax,%rcx
+        test   %rcx,(%rcx)
+
+        lea    8(%rsp),%rax     // load pointer to the return address into rax
+        mov    %rcx,%rsp        // install the new top of stack pointer into rsp
+        mov    -8(%rax),%rcx    // restore rcx
+        push   (%rax)           // push return address onto the stack
+        sub    %rsp,%rax        // restore the original value in rax
+        ret
+END_COMPILERRT_FUNCTION(___chkstk)
+END_COMPILERRT_FUNCTION(__alloca)
+
+#endif // __x86_64__