13 files changed, 834 insertions, 14 deletions
diff --git a/test/Transforms/BBVectorize/no-ldstr-conn.ll b/test/Transforms/BBVectorize/no-ldstr-conn.ll
new file mode 100644
index 0000000..ada2a71
--- /dev/null
+++ b/test/Transforms/BBVectorize/no-ldstr-conn.ll
@@ -0,0 +1,23 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
+
+; Make sure that things (specifically getelementptr) are not connected to loads
+; and stores via the address operand (which would be bad because the address
+; is really a scalar even after vectorization)
+define i64 @test2(i64 %a) nounwind uwtable readonly {
+entry:
+  %a1 = inttoptr i64 %a to i64*
+  %a2 = getelementptr i64* %a1, i64 1
+  %a3 = getelementptr i64* %a1, i64 2
+  %v2 = load i64* %a2, align 8
+  %v3 = load i64* %a3, align 8
+  %v2a = add i64 %v2, 5
+  %v3a = add i64 %v3, 7
+  store i64 %v2a, i64* %a2, align 8
+  store i64 %v3a, i64* %a3, align 8
+  %r = add i64 %v2, %v3
+  ret i64 %r
+; CHECK: @test2
+; CHECK-NOT: getelementptr <2 x i64*>
+}
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
new file mode 100644
index 0000000..f992d41
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@@ -0,0 +1,81 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+
+; Simple 3-pair chain also with loads and stores (using ptrs and gep)
+define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load i64* %a, align 8
+  %i1 = load i64* %b, align 8
+  %mul = mul i64 %i0, %i1
+  %arrayidx3 = getelementptr inbounds i64* %a, i64 1
+  %i3 = load i64* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds i64* %b, i64 1
+  %i4 = load i64* %arrayidx4, align 8
+  %mul5 = mul i64 %i3, %i4
+  %ptr = inttoptr i64 %mul to double*
+  %ptr5 = inttoptr i64 %mul5 to double*
+  %aptr = getelementptr inbounds double* %ptr, i64 2
+  %aptr5 = getelementptr inbounds double* %ptr5, i64 3
+  %av = load double* %aptr, align 16
+  %av5 = load double* %aptr5, align 16
+  %r = fmul double %av, %av5
+  store i64 %mul, i64* %c, align 8
+  %arrayidx5 = getelementptr inbounds i64* %c, i64 1
+  store i64 %mul5, i64* %arrayidx5, align 8
+  ret double %r
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
+; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
+; CHECK: %i0 = load <2 x i64>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x i64>* %i1.v.i0, align 8
+; CHECK: %mul = mul <2 x i64> %i0, %i1
+; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*>
+; CHECK: %aptr = getelementptr inbounds <2 x double*> %ptr, <2 x i64> <i64 2, i64 3>
+; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0
+; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1
+; CHECK: %av = load double* %aptr.v.r1, align 16
+; CHECK: %av5 = load double* %aptr.v.r2, align 16
+; CHECK: %r = fmul double %av, %av5
+; CHECK: %0 = bitcast i64* %c to <2 x i64>*
+; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
+; CHECK: ret double %r
+; CHECK-AO: @test1
+; CHECK-AO-NOT: load <2 x
+}
+
+; Simple 3-pair chain with loads and stores (using ptrs and gep)
+define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly {
+entry:
+  %i0 = load i64** %a, align 8
+  %i1 = load i64** %b, align 8
+  %arrayidx3 = getelementptr inbounds i64** %a, i64 1
+  %i3 = load i64** %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds i64** %b, i64 1
+  %i4 = load i64** %arrayidx4, align 8
+  %o1 = load i64* %i1, align 8
+  %o4 = load i64* %i4, align 8
+  %ptr0 = getelementptr inbounds i64* %i0, i64 %o1
+  %ptr3 = getelementptr inbounds i64* %i3, i64 %o4
+  store i64* %ptr0, i64** %c, align 8
+  %arrayidx5 = getelementptr inbounds i64** %c, i64 1
+  store i64* %ptr3, i64** %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
+; CHECK: %i1 = load i64** %b, align 8
+; CHECK: %i0 = load <2 x i64*>* %i0.v.i0, align 8
+; CHECK: %arrayidx4 = getelementptr inbounds i64** %b, i64 1
+; CHECK: %i4 = load i64** %arrayidx4, align 8
+; CHECK: %o1 = load i64* %i1, align 8
+; CHECK: %o4 = load i64* %i4, align 8
+; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
+; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
+; CHECK: %ptr0 = getelementptr inbounds <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2
+; CHECK: %0 = bitcast i64** %c to <2 x i64*>*
+; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test2
+; CHECK-AO-NOT: <2 x
+}
+
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
new file mode 100644
index 0000000..4daa571
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -0,0 +1,30 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain with select
+define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+        %Z1 = select i1 %C1, double %Y1, double %B1
+        %Z2 = select i1 %C2, double %Y2, double %B2
+; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0
+; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1
+; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index af8fa32..ce6e2c4 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -12,6 +12,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @xs = global [2 x i32] zeroinitializer, align 4
 ; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
 
+; PR12642
+%PR12642.struct = type { i8 }
+@PR12642.s = global <{}> zeroinitializer, align 1
+@PR12642.p = constant %PR12642.struct* bitcast (i8* getelementptr (i8* bitcast (<{}>* @PR12642.s to i8*), i64 1) to %PR12642.struct*), align 8
+
 define internal void @test1() {
 entry:
   store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
diff --git a/test/Transforms/InstCombine/2012-04-30-SRem.ll b/test/Transforms/InstCombine/2012-04-30-SRem.ll
new file mode 100644
index 0000000..a285d5a
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-04-30-SRem.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; PR12541
+
+define i32 @foo(i32 %x) {
+  %y = xor i32 %x, 3
+  %z = srem i32 1656690544, %y
+  %sext = shl i32 %z, 24
+  %s = ashr exact i32 %sext, 24
+  ret i32 %s
+; CHECK-NOT: and
+; The shifts were wrongly being turned into an and with 112
+}
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index 55243a6..0ea73a0 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -1,70 +1,93 @@
-; This test makes sure that shit instructions are properly eliminated
+; This test makes sure that shift instructions are properly eliminated
 ; even with arbitrary precision integers.
-; RUN: opt < %s -instcombine -S | not grep sh
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; CHECK: @test1
+; CHECK-NOT: sh
 define i47 @test1(i47 %A) {
 	%B = shl i47 %A, 0		; <i47> [#uses=1]
 	ret i47 %B
 }
 
+; CHECK: @test2
+; CHECK-NOT: sh
 define i41 @test2(i7 %X) {
 	%A = zext i7 %X to i41		; <i41> [#uses=1]
 	%B = shl i41 0, %A		; <i41> [#uses=1]
 	ret i41 %B
 }
 
+; CHECK: @test3
+; CHECK-NOT: sh
 define i41 @test3(i41 %A) {
 	%B = ashr i41 %A, 0		; <i41> [#uses=1]
 	ret i41 %B
 }
 
+; CHECK: @test4
+; CHECK-NOT: sh
 define i39 @test4(i7 %X) {
 	%A = zext i7 %X to i39		; <i39> [#uses=1]
 	%B = ashr i39 0, %A		; <i39> [#uses=1]
 	ret i39 %B
 }
 
+; CHECK: @test5
+; CHECK-NOT: sh
 define i55 @test5(i55 %A) {
 	%B = lshr i55 %A, 55		; <i55> [#uses=1]
 	ret i55 %B
 }
 
+; CHECK: @test5a
+; CHECK-NOT: sh
 define i32 @test5a(i32 %A) {
 	%B = shl i32 %A, 32		; <i32> [#uses=1]
 	ret i32 %B
 }
 
+; CHECK: @test6
+; CHECK-NOT: sh
 define i55 @test6(i55 %A) {
 	%B = shl i55 %A, 1		; <i55> [#uses=1]
 	%C = mul i55 %B, 3		; <i55> [#uses=1]
 	ret i55 %C
 }
 
+; CHECK: @test7
+; CHECK-NOT: sh
 define i29 @test7(i8 %X) {
 	%A = zext i8 %X to i29		; <i29> [#uses=1]
 	%B = ashr i29 -1, %A		; <i29> [#uses=1]
 	ret i29 %B
 }
 
+; CHECK: @test8
+; CHECK-NOT: sh
 define i7 @test8(i7 %A) {
 	%B = shl i7 %A, 4		; <i7> [#uses=1]
 	%C = shl i7 %B, 3		; <i7> [#uses=1]
 	ret i7 %C
 }
 
+; CHECK: @test9
+; CHECK-NOT: sh
 define i17 @test9(i17 %A) {
 	%B = shl i17 %A, 16		; <i17> [#uses=1]
 	%C = lshr i17 %B, 16		; <i17> [#uses=1]
 	ret i17 %C
 }
 
+; CHECK: @test10
+; CHECK-NOT: sh
 define i19 @test10(i19 %A) {
 	%B = lshr i19 %A, 18		; <i19> [#uses=1]
 	%C = shl i19 %B, 18		; <i19> [#uses=1]
 	ret i19 %C
 }
 
+; CHECK: @test11
+; CHECK-NOT: sh
 define i23 @test11(i23 %A) {
 	%a = mul i23 %A, 3		; <i23> [#uses=1]
 	%B = lshr i23 %a, 11		; <i23> [#uses=1]
@@ -72,12 +95,16 @@ define i23 @test11(i23 %A) {
 	ret i23 %C
 }
 
+; CHECK: @test12
+; CHECK-NOT: sh
 define i47 @test12(i47 %A) {
 	%B = ashr i47 %A, 8		; <i47> [#uses=1]
 	%C = shl i47 %B, 8		; <i47> [#uses=1]
 	ret i47 %C
 }
 
+; CHECK: @test13
+; CHECK-NOT: sh
 define i18 @test13(i18 %A) {
 	%a = mul i18 %A, 3		; <i18> [#uses=1]
 	%B = ashr i18 %a, 8		; <i18> [#uses=1]
@@ -85,6 +112,8 @@ define i18 @test13(i18 %A) {
 	ret i18 %C
 }
 
+; CHECK: @test14
+; CHECK-NOT: sh
 define i35 @test14(i35 %A) {
 	%B = lshr i35 %A, 4		; <i35> [#uses=1]
 	%C = or i35 %B, 1234		; <i35> [#uses=1]
@@ -92,6 +121,8 @@ define i35 @test14(i35 %A) {
 	ret i35 %D
 }
 
+; CHECK: @test14a
+; CHECK-NOT: sh
 define i79 @test14a(i79 %A) {
 	%B = shl i79 %A, 4		; <i79> [#uses=1]
 	%C = and i79 %B, 1234		; <i79> [#uses=1]
@@ -99,12 +130,16 @@ define i79 @test14a(i79 %A) {
 	ret i79 %D
 }
 
+; CHECK: @test15
+; CHECK-NOT: sh
 define i45 @test15(i1 %C) {
 	%A = select i1 %C, i45 3, i45 1	; <i45> [#uses=1]
 	%V = shl i45 %A, 2		; <i45> [#uses=1]
 	ret i45 %V
 }
 
+; CHECK: @test15a
+; CHECK-NOT: sh
 define i53 @test15a(i1 %X) {
 	%A = select i1 %X, i8 3, i8 1	; <i8> [#uses=1]
 	%B = zext i8 %A to i53		; <i53> [#uses=1]
@@ -112,6 +147,8 @@ define i53 @test15a(i1 %X) {
 	ret i53 %V
 }
 
+; CHECK: @test16
+; CHECK-NOT: sh
 define i1 @test16(i84 %X) {
 	%tmp.3 = ashr i84 %X, 4		; <i84> [#uses=1]
 	%tmp.6 = and i84 %tmp.3, 1	; <i84> [#uses=1]
@@ -119,48 +156,64 @@ define i1 @test16(i84 %X) {
 	ret i1 %tmp.7
 }
 
+; CHECK: @test17
+; CHECK-NOT: sh
 define i1 @test17(i106 %A) {
 	%B = lshr i106 %A, 3		; <i106> [#uses=1]
 	%C = icmp eq i106 %B, 1234	; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test18
+; CHECK-NOT: sh
 define i1 @test18(i11 %A) {
 	%B = lshr i11 %A, 10		; <i11> [#uses=1]
 	%C = icmp eq i11 %B, 123	; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test19
+; CHECK-NOT: sh
 define i1 @test19(i37 %A) {
 	%B = ashr i37 %A, 2		; <i37> [#uses=1]
 	%C = icmp eq i37 %B, 0		; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test19a
+; CHECK-NOT: sh
 define i1 @test19a(i39 %A) {
 	%B = ashr i39 %A, 2		; <i39> [#uses=1]
 	%C = icmp eq i39 %B, -1		; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test20
+; CHECK-NOT: sh
 define i1 @test20(i13 %A) {
 	%B = ashr i13 %A, 12		; <i13> [#uses=1]
 	%C = icmp eq i13 %B, 123	; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test21
+; CHECK-NOT: sh
 define i1 @test21(i12 %A) {
 	%B = shl i12 %A, 6		; <i12> [#uses=1]
 	%C = icmp eq i12 %B, -128		; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test22
+; CHECK-NOT: sh
 define i1 @test22(i14 %A) {
 	%B = shl i14 %A, 7		; <i14> [#uses=1]
 	%C = icmp eq i14 %B, 0		; <i1> [#uses=1]
 	ret i1 %C
 }
 
+; CHECK: @test23
+; CHECK-NOT: sh
 define i11 @test23(i44 %A) {
 	%B = shl i44 %A, 33		; <i44> [#uses=1]
 	%C = ashr i44 %B, 33		; <i44> [#uses=1]
@@ -168,6 +221,8 @@ define i11 @test23(i44 %A) {
 	ret i11 %D
 }
 
+; CHECK: @test25
+; CHECK-NOT: sh
 define i37 @test25(i37 %tmp.2, i37 %AA) {
 	%x = lshr i37 %AA, 17		; <i37> [#uses=1]
 	%tmp.3 = lshr i37 %tmp.2, 17		; <i37> [#uses=1]
@@ -176,6 +231,8 @@ define i37 @test25(i37 %tmp.2, i37 %AA) {
 	ret i37 %tmp.6
 }
 
+; CHECK: @test26
+; CHECK-NOT: sh
 define i40 @test26(i40 %A) {
 	%B = lshr i40 %A, 1		; <i40> [#uses=1]
 	%C = bitcast i40 %B to i40		; <i40> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index 2dcaab8..ed32ca8 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -61,7 +61,7 @@ exit:                                 ; preds = %cond.true29.i, %cond.true.i
 ; CHECK: @test2
 ; CHECK: %entry
 ; CHECK-NOT: mov
-; CHECK: jne
+; CHECK: je
 define void @test2(i32 %n) nounwind uwtable {
 entry:
   br i1 undef, label %while.end, label %for.cond468
diff --git a/test/Transforms/LoopStrengthReduce/pr12691.ll b/test/Transforms/LoopStrengthReduce/pr12691.ll
new file mode 100644
index 0000000..8399434
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12691.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+@d = common global i32 0, align 4
+
+define void @fn2(i32 %x) nounwind uwtable {
+entry:
+  br label %for.cond
+
+for.cond:
+  %g.0 = phi i32 [ 0, %entry ], [ %dec, %for.cond ]
+  %tobool = icmp eq i32 %x, 0
+  %dec = add nsw i32 %g.0, -1
+  br i1 %tobool, label %for.cond, label %for.end
+
+for.end:
+; CHECK:  %tmp1 = load i32* @d, align 4
+; CHECK-NEXT:  %tmp2 = load i32* @d, align 4
+; CHECK-NEXT:  %0 = sub i32 %tmp1, %tmp2
+
+  %tmp1 = load i32* @d, align 4
+  %add = add nsw i32 %tmp1, %g.0
+  %tmp2 = load i32* @d, align 4
+  %tobool26 = icmp eq i32 %x, 0
+  br i1 %tobool26, label %for.end5, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %tobool3 = icmp ne i32 %tmp2, %add
+  br label %for.end5
+
+for.end5:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
new file mode 100644
index 0000000..261876d
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -loop-unswitch -gvn -verify
+; PR12573
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 }
+%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* }
+%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* }
+%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 }
+
+define void @_Z23get_reconstruction_pathv() uwtable ssp {
+entry:
+  %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.end, %entry
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %for.cond
+  invoke void @_ZN1C3endEv()
+          to label %for.cond3 unwind label %lpad
+
+for.cond3:                                        ; preds = %invoke.cont6, %invoke.cont
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4:                                     ; preds = %for.cond3
+  invoke void @_ZN1C3endEv()
+          to label %invoke.cont6 unwind label %lpad
+
+invoke.cont6:                                     ; preds = %invoke.cont4
+  br i1 undef, label %for.cond3, label %for.end
+
+lpad:                                             ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } undef
+
+for.end:                                          ; preds = %invoke.cont6
+  invoke void @_ZN1C13_M_insert_auxER1D()
+          to label %for.cond unwind label %lpad
+}
+
+define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
+  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8, !tbaa !0
+  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+  %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8, !tbaa !0
+  %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
+  br i1 %tobool, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  call void @_Z10__assert13v() noreturn
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN1C3endEv()
+
+define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BaSERS_() uwtable ssp align 2 {
+entry:
+  unreachable
+}
+
+declare void @_Z10__assert13v() noreturn
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
new file mode 100644
index 0000000..3f694cf
--- /dev/null
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -0,0 +1,131 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+; rdar://11229925
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.__block_byref_weakLogNTimes = type { i8*, %struct.__block_byref_weakLogNTimes*, i32, i32, i8*, i8*, void (...)* }
+%struct.__block_descriptor = type { i64, i64 }
+
+; Don't optimize away the retainBlock, because the object's address "escapes"
+; with the objc_storeWeak call.
+
+; CHECK: define void @test0(
+; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0() nounwind {
+entry:
+  %weakLogNTimes = alloca %struct.__block_byref_weakLogNTimes, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 1
+  store %struct.__block_byref_weakLogNTimes* %weakLogNTimes, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 2
+  store i32 33554432, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 3
+  store i32 48, i32* %byref.size, align 4
+  %tmp1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 4
+  store i8* bitcast (void (i8*, i8*)* @__Block_byref_object_copy_ to i8*), i8** %tmp1, align 8
+  %tmp2 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 5
+  store i8* bitcast (void (i8*)* @__Block_byref_object_dispose_ to i8*), i8** %tmp2, align 8
+  %weakLogNTimes1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 6
+  %tmp3 = bitcast void (...)** %weakLogNTimes1 to i8**
+  %tmp4 = call i8* @objc_initWeak(i8** %tmp3, i8* null) nounwind
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* null, i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*, i32)* @__main_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* null, %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %tmp5 = bitcast %struct.__block_byref_weakLogNTimes* %weakLogNTimes to i8*
+  store i8* %tmp5, i8** %block.captured, align 8
+  %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
+  %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6
+  %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8**
+  %tmp10 = call i8* @objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind
+  %tmp11 = getelementptr inbounds i8* %tmp7, i64 16
+  %tmp12 = bitcast i8* %tmp11 to i8**
+  %tmp13 = load i8** %tmp12, align 8
+  %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)*
+  call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0
+  call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+  call void @_Block_object_dispose(i8* %tmp5, i32 8) nounwind
+  call void @objc_destroyWeak(i8** %tmp3) nounwind
+  ret void
+}
+
+; Like test0, but it makes a regular call instead of a storeWeak call,
+; so the optimization is valid.
+
+; CHECK: define void @test1(
+; CHECK-NOT: @objc_retainBlock
+; CHECK: }
+define void @test1() nounwind {
+entry:
+  %weakLogNTimes = alloca %struct.__block_byref_weakLogNTimes, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 1
+  store %struct.__block_byref_weakLogNTimes* %weakLogNTimes, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 2
+  store i32 33554432, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 3
+  store i32 48, i32* %byref.size, align 4
+  %tmp1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 4
+  store i8* bitcast (void (i8*, i8*)* @__Block_byref_object_copy_ to i8*), i8** %tmp1, align 8
+  %tmp2 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 5
+  store i8* bitcast (void (i8*)* @__Block_byref_object_dispose_ to i8*), i8** %tmp2, align 8
+  %weakLogNTimes1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 6
+  %tmp3 = bitcast void (...)** %weakLogNTimes1 to i8**
+  %tmp4 = call i8* @objc_initWeak(i8** %tmp3, i8* null) nounwind
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* null, i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*, i32)* @__main_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* null, %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %tmp5 = bitcast %struct.__block_byref_weakLogNTimes* %weakLogNTimes to i8*
+  store i8* %tmp5, i8** %block.captured, align 8
+  %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
+  %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6
+  %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8**
+  %tmp10 = call i8* @not_really_objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind
+  %tmp11 = getelementptr inbounds i8* %tmp7, i64 16
+  %tmp12 = bitcast i8* %tmp11 to i8**
+  %tmp13 = load i8** %tmp12, align 8
+  %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)*
+  call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0
+  call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+  call void @_Block_object_dispose(i8* %tmp5, i32 8) nounwind
+  call void @objc_destroyWeak(i8** %tmp3) nounwind
+  ret void
+}
+
+declare void @__Block_byref_object_copy_(i8*, i8*) nounwind
+declare void @__Block_byref_object_dispose_(i8*) nounwind
+declare void @objc_destroyWeak(i8**)
+declare i8* @objc_initWeak(i8**, i8*)
+declare void @__main_block_invoke_0(i8* nocapture, i32) nounwind ssp
+declare void @_Block_object_dispose(i8*, i32)
+declare i8* @objc_retainBlock(i8*)
+declare i8* @objc_storeWeak(i8**, i8*)
+declare i8* @not_really_objc_storeWeak(i8**, i8*)
+declare void @objc_release(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/Reassociate/pr12245.ll b/test/Transforms/Reassociate/pr12245.ll
new file mode 100644
index 0000000..84098bd
--- /dev/null
+++ b/test/Transforms/Reassociate/pr12245.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -basicaa -inline -instcombine -reassociate -dse -disable-output
+; PR12245
+
+@a = common global i32 0, align 4
+@d = common global i32 0, align 4
+
+define i32 @fn2() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @a, align 4, !tbaa !0
+  %dec = add nsw i32 %0, -1
+  store i32 %dec, i32* @a, align 4, !tbaa !0
+  %1 = load i32* @d, align 4, !tbaa !0
+  %sub = sub nsw i32 %dec, %1
+  store i32 %sub, i32* @d, align 4, !tbaa !0
+  %2 = load i32* @a, align 4, !tbaa !0
+  %dec1 = add nsw i32 %2, -1
+  store i32 %dec1, i32* @a, align 4, !tbaa !0
+  %3 = load i32* @d, align 4, !tbaa !0
+  %sub2 = sub nsw i32 %dec1, %3
+  store i32 %sub2, i32* @d, align 4, !tbaa !0
+  %4 = load i32* @a, align 4, !tbaa !0
+  %dec3 = add nsw i32 %4, -1
+  store i32 %dec3, i32* @a, align 4, !tbaa !0
+  %5 = load i32* @d, align 4, !tbaa !0
+  %sub4 = sub nsw i32 %dec3, %5
+  store i32 %sub4, i32* @d, align 4, !tbaa !0
+  %6 = load i32* @a, align 4, !tbaa !0
+  %dec5 = add nsw i32 %6, -1
+  store i32 %dec5, i32* @a, align 4, !tbaa !0
+  %7 = load i32* @d, align 4, !tbaa !0
+  %sub6 = sub nsw i32 %dec5, %7
+  store i32 %sub6, i32* @d, align 4, !tbaa !0
+  %8 = load i32* @a, align 4, !tbaa !0
+  %dec7 = add nsw i32 %8, -1
+  store i32 %dec7, i32* @a, align 4, !tbaa !0
+  %9 = load i32* @d, align 4, !tbaa !0
+  %sub8 = sub nsw i32 %dec7, %9
+  store i32 %sub8, i32* @d, align 4, !tbaa !0
+  ret i32 0
+}
+
+define i32 @fn1() nounwind uwtable ssp {
+entry:
+  %call = call i32 @fn2()
+  ret i32 %call
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
index 8780e32..03dcdf5 100644
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ b/test/Transforms/SimplifyLibCalls/floor.ll
@@ -1,16 +1,31 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: not grep {call.*floor(} %t
-; RUN: grep {call.*floorf(} %t
-; RUN: not grep {call.*ceil(} %t
-; RUN: grep {call.*ceilf(} %t
-; RUN: not grep {call.*nearbyint(} %t
-; RUN: grep {call.*nearbyintf(} %t
-; XFAIL: sparc
+; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
+; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
+; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -simplify-libcalls -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
+
+; DO-SIMPLIFY: call float @floorf(
+; DO-SIMPLIFY: call float @ceilf(
+; DO-SIMPLIFY: call float @roundf(
+; DO-SIMPLIFY: call float @nearbyintf(
+
+; C89-SIMPLIFY: call float @floorf(
+; C89-SIMPLIFY: call float @ceilf(
+; C89-SIMPLIFY: call double @round(
+; C89-SIMPLIFY: call double @nearbyint(
+
+; DONT-SIMPLIFY: call double @floor(
+; DONT-SIMPLIFY: call double @ceil(
+; DONT-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @nearbyint(
 
 declare double @floor(double)
 
 declare double @ceil(double)
 
+declare double @round(double)
+
 declare double @nearbyint(double)
 
 define float @test_floor(float %C) {
@@ -29,8 +44,14 @@ define float @test_ceil(float %C) {
 	ret float %F
 }
 
-; PR8466
-; XFAIL: win32
+define float @test_round(float %C) {
+	%D = fpext float %C to double		; <double> [#uses=1]
+	; --> roundf
+        %E = call double @round( double %D )		; <double> [#uses=1]
+	%F = fptrunc double %E to float		; <float> [#uses=1]
+	ret float %F
+}
+
 define float @test_nearbyint(float %C) {
 	%D = fpext float %C to double		; <double> [#uses=1]
 	; --> nearbyintf
diff --git a/test/Transforms/SimplifyLibCalls/win-math.ll b/test/Transforms/SimplifyLibCalls/win-math.ll
new file mode 100644
index 0000000..367e5b8
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/win-math.ll
@@ -0,0 +1,275 @@
+; RUN: opt -O2 -S -mtriple=i386-pc-win32 < %s | FileCheck %s -check-prefix=WIN32
+; RUN: opt -O2 -S -mtriple=x86_64-pc-win32 < %s | FileCheck %s -check-prefix=WIN64
+; RUN: opt -O2 -S -mtriple=i386-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW32
+; RUN: opt -O2 -S -mtriple=x86_64-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW64
+
+; x86 win32 msvcrt does not provide entry points for single-precision libm.
+; x86-64 win32 msvcrt does (except for fabsf)
+; msvcrt does not provide C99 math, but mingw32 does.
+
+declare double @acos(double %x)
+define float @float_acos(float %x) nounwind readnone {
+; WIN32: @float_acos
+; WIN32-NOT: float @acosf
+; WIN32: double @acos
+    %1 = fpext float %x to double
+    %2 = call double @acos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @asin(double %x)
+define float @float_asin(float %x) nounwind readnone {
+; WIN32: @float_asin
+; WIN32-NOT: float @asinf
+; WIN32: double @asin
+    %1 = fpext float %x to double
+    %2 = call double @asin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan(double %x)
+define float @float_atan(float %x) nounwind readnone {
+; WIN32: @float_atan
+; WIN32-NOT: float @atanf
+; WIN32: double @atan
+    %1 = fpext float %x to double
+    %2 = call double @atan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan2(double %x, double %y)
+define float @float_atan2(float %x, float %y) nounwind readnone {
+; WIN32: @float_atan2
+; WIN32-NOT: float @atan2f
+; WIN32: double @atan2
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @atan2(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @ceil(double %x)
+define float @float_ceil(float %x) nounwind readnone {
+; WIN32: @float_ceil
+; WIN32-NOT: float @ceilf
+; WIN32: double @ceil
+; WIN64: @float_ceil
+; WIN64: float @ceilf
+; WIN64-NOT: double @ceil
+; MINGW32: @float_ceil
+; MINGW32: float @ceilf
+; MINGW32-NOT: double @ceil
+; MINGW64: @float_ceil
+; MINGW64: float @ceilf
+; MINGW64-NOT: double @ceil
+    %1 = fpext float %x to double
+    %2 = call double @ceil(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @_copysign(double %x)
+define float @float_copysign(float %x) nounwind readnone {
+; WIN32: @float_copysign
+; WIN32-NOT: float @copysignf
+; WIN32-NOT: float @_copysignf
+; WIN32: double @_copysign
+    %1 = fpext float %x to double
+    %2 = call double @_copysign(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cos(double %x)
+define float @float_cos(float %x) nounwind readnone {
+; WIN32: @float_cos
+; WIN32-NOT: float @cosf
+; WIN32: double @cos
+    %1 = fpext float %x to double
+    %2 = call double @cos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cosh(double %x)
+define float @float_cosh(float %x) nounwind readnone {
+; WIN32: @float_cosh
+; WIN32-NOT: float @coshf
+; WIN32: double @cosh
+    %1 = fpext float %x to double
+    %2 = call double @cosh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @exp(double %x, double %y)
+define float @float_exp(float %x, float %y) nounwind readnone {
+; WIN32: @float_exp
+; WIN32-NOT: float @expf
+; WIN32: double @exp
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @exp(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @fabs(double %x, double %y)
+define float @float_fabs(float %x, float %y) nounwind readnone {
+; WIN32: @float_fabs
+; WIN32-NOT: float @fabsf
+; WIN32: double @fabs
+; WIN64: @float_fabs
+; WIN64-NOT: float @fabsf
+; WIN64: double @fabs
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fabs(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @floor(double %x)
+define float @float_floor(float %x) nounwind readnone {
+; WIN32: @float_floor
+; WIN32-NOT: float @floorf
+; WIN32: double @floor
+; WIN64: @float_floor
+; WIN64: float @floorf
+; WIN64-NOT: double @floor
+; MINGW32: @float_floor
+; MINGW32: float @floorf
+; MINGW32-NOT: double @floor
+; MINGW64: @float_floor
+; MINGW64: float @floorf
+; MINGW64-NOT: double @floor
+    %1 = fpext float %x to double
+    %2 = call double @floor(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @fmod(double %x, double %y)
+define float @float_fmod(float %x, float %y) nounwind readnone {
+; WIN32: @float_fmod
+; WIN32-NOT: float @fmodf
+; WIN32: double @fmod
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fmod(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @log(double %x)
+define float @float_log(float %x) nounwind readnone {
+; WIN32: @float_log
+; WIN32-NOT: float @logf
+; WIN32: double @log
+    %1 = fpext float %x to double
+    %2 = call double @log(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @pow(double %x, double %y)
+define float @float_pow(float %x, float %y) nounwind readnone {
+; WIN32: @float_pow
+; WIN32-NOT: float @powf
+; WIN32: double @pow
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @pow(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @sin(double %x)
+define float @float_sin(float %x) nounwind readnone {
+; WIN32: @float_sin
+; WIN32-NOT: float @sinf
+; WIN32: double @sin
+    %1 = fpext float %x to double
+    %2 = call double @sin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sinh(double %x)
+define float @float_sinh(float %x) nounwind readnone {
+; WIN32: @float_sinh
+; WIN32-NOT: float @sinhf
+; WIN32: double @sinh
+    %1 = fpext float %x to double
+    %2 = call double @sinh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sqrt(double %x)
+define float @float_sqrt(float %x) nounwind readnone {
+; WIN32: @float_sqrt
+; WIN32-NOT: float @sqrtf
+; WIN32: double @sqrt
+; WIN64: @float_sqrt
+; WIN64: float @sqrtf
+; WIN64-NOT: double @sqrt
+; MINGW32: @float_sqrt
+; MINGW32: float @sqrtf
+; MINGW32-NOT: double @sqrt
+; MINGW64: @float_sqrt
+; MINGW64: float @sqrtf
+; MINGW64-NOT: double @sqrt
+    %1 = fpext float %x to double
+    %2 = call double @sqrt(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tan(double %x)
+define float @float_tan(float %x) nounwind readnone {
+; WIN32: @float_tan
+; WIN32-NOT: float @tanf
+; WIN32: double @tan
+    %1 = fpext float %x to double
+    %2 = call double @tan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tanh(double %x)
+define float @float_tanh(float %x) nounwind readnone {
+; WIN32: @float_tanh
+; WIN32-NOT: float @tanhf
+; WIN32: double @tanh
+    %1 = fpext float %x to double
+    %2 = call double @tanh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+; win32 does not have round; mingw32 does
+declare double @round(double %x)
+define float @float_round(float %x) nounwind readnone {
+; WIN32: @float_round
+; WIN32-NOT: float @roundf
+; WIN32: double @round
+; WIN64: @float_round
+; WIN64-NOT: float @roundf
+; WIN64: double @round
+; MINGW32: @float_round
+; MINGW32: float @roundf
+; MINGW32-NOT: double @round
+; MINGW64: @float_round
+; MINGW64: float @roundf
+; MINGW64-NOT: double @round
+    %1 = fpext float %x to double
+    %2 = call double @round(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+