2221 files changed, 37626 insertions, 3663 deletions
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index caa9a98..a0235f7 100644
--- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
 @ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 6e11b16..81483cb 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 7317e62..83b26d3 100644
--- a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
 	br label %bb
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 07390ad..33f935e 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
 ; | grep 35
 
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 32daf83..b0953dc 100644
--- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
 ; PR1257
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
index 6d3f640..d741112 100644
--- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR1266
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index f927ef4..e4635f5 100644
--- a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.rtx_def = type { i16, i8, i8, %struct.u }
diff --git a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
index 55d2993..ea27676 100644
--- a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index ef5a1ae..f24def3 100644
--- a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-apple-darwin
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin
 
 	%struct.H_TBL = type { [17 x i8], [256 x i8], i32 }
 	%struct.Q_TBL = type { [64 x i16], i32 }
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index e412127..b543c57 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep {add.*#0}
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index 42f5034..e001cde 100644
--- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   not grep LPC9
 
 	%struct.B = type { i32 }
diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index ec70a59..a89e937 100644
--- a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "arm-apple-darwin8"
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index f3f82bc..c73b679 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Connection = type { i32, [10 x i8], i32 }
 	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
index 11431be..26864f1 100644
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep 1_0
+; RUN: llc < %s | not grep 1_0
 ; This used to create an extra branch to 'entry', LBB1_0.
 
 ; ModuleID = 'bug.bc'
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index c3596e7..f2a8ee1 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 41ab1e5..2758505 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
index 58c5f89..b3b0769 100644
--- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
+++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test3() {
 	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
diff --git a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
index 430b368..7b15ded 100644
--- a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1406
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 4c4a933..061bf5e 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
 ; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index de32a26..d2eb85d 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep {str.*\\!}
 
 	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
 	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
index d21a8f2..030486a 100644
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1424
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index 3cfcdef..30b72e0 100644
--- a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
 ; PR1609
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index ec170f8..ff01506 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -regalloc=local
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local
 ; PR1925
 
 	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index b81d575..06bc987 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local
 ; PR1925
 
 	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index ca34275..a604c5c 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | not grep 255
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 70f1774..78c6222 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 @accum = external global { double, double }		; <{ double, double }*> [#uses=1]
 @.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
index 610f5ea..234c7b6 100644
--- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @numBinsY = external global i32		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 80ccddf..77418be 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
index 3cd757f..33bd4de 100644
--- a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
+++ b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
 
diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index 035af08..71aa603 100644
--- a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.BiContextType = type { i16, i8, i32 }
 	%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
index e98126b..aa61d86 100644
--- a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
 @decoders = external global %struct.Decoders		; <%struct.Decoders*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
index aa75970..4cb768e 100644
--- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll
+++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define float @f(float %a, float %b) nounwind  {
 	%tmp = fdiv float %a, %b
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index 6ea75eb..83fde07 100644
--- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR2589
 
 define void @main({ i32 }*) {
diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index 0a79e86..adb0112 100644
--- a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
index c601b90..5f9d9ae 100644
--- a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @"\01LC1" = external constant [288 x i8]		; <[288 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
index b3ea6fc..d3bc3e1 100644
--- a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define void @gcov_exit() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
index 164e964..601a516 100644
--- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 3f17a51..35ca7b4 100644
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 154
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 48e663d..4c0c59c 100644
--- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index d7befa0..a48f003 100644
--- a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3610
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "arm-elf"
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bd5b719..bc5e602 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 @a = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 399ed30..0ec17ae 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
index 0ec6d7d..a1ce384 100644
--- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.hit_t = type { %struct.v_t, double }
 	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 11c05c6..3526722 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep {swi 107}
 
 define i32 @_swilseek(i32) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index c00b1fb..f6b3d2c 100644
--- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3795
 
 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index c7e343c..99907fc 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index f394847..05d2f26 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 223fa0f..deb092b 100644
--- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3954
 
 define void @foo(...) nounwind {
diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
index 2bca6e6..670d204 100644
--- a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
+; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
 ; PR4166
 
 	%"byte[]" = type { i32, i8* }
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index d03b7ce..75610ff 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
 ; PR4100
 @.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 35d4306..7046fcc 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 	%struct.List = type { %struct.List*, i32 }
 @Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
 @"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
index f942c9f..1e2707f 100644
--- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep swp
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
 ; PR4091
 
 define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
 	%asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
 	ret void
 }
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
index 7cd35b9..403e3f6 100644
--- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
 
 @"\01LC" = external constant [15 x i8]		; <[15 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 5eaae7a..98e0023 100644
--- a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
 
 	%struct.anon = type { i16, i16 }
 	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
index 45b4bd4..27888d7 100644
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
 	type { i32, %struct.D_Reduction** }		; type %1
diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
index c715a18..a0f903b 100644
--- a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
   %struct.term = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
index cbe2385..b56b6844 100644
--- a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
 ; PR4419
 
 define float @__ieee754_acosf(float %x) nounwind {
diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 5c8d7b0..e068be7 100644
--- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	%struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 27cad7c..17efe00 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @nn = external global i32		; <i32*> [#uses=1]
 @al_len = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 3a14d67..f520be3 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @no_mat = external global i32		; <i32*> [#uses=1]
 @no_mis = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index f94b59d..eee6ff9 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @JJ = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index bca7f79..93c92b1 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @r = external global i32		; <i32*> [#uses=1]
 @qr = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 0c90592..277283d 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @XX = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index dfccefc..5c0e5fa 100644
--- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @qr = external global i32		; <i32*> [#uses=1]
 @II = external global i32*		; <i32**> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 0000000..e1e94b6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+	call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 0000000..ee93fde
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+
+	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+	%struct.VEC2 = type { double, double, double }
+	%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+	%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null		; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+	%delright = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%delleft = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%0 = icmp eq %struct.VERTEX* %tree, null		; <i1> [#uses=1]
+	br i1 %0, label %bb8, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
+	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
+	br i1 %3, label %bb7, label %bb1.i
+
+bb1.i:		; preds = %bb1.i, %bb
+	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
+	%4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
+	br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit:		; preds = %bb1.i
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+	%7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+	%9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.i, %get_low.exit
+	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
+	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
+	%17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
+	%20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%23 = load double* %22, align 4		; <double> [#uses=3]
+	br label %bb2.i
+
+bb1.i1:		; preds = %bb2.i
+	%24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32		; <i32> [#uses=2]
+	%25 = add i32 %24, 48		; <i32> [#uses=1]
+	%26 = and i32 %25, 63		; <i32> [#uses=1]
+	%27 = and i32 %24, -64		; <i32> [#uses=1]
+	%28 = or i32 %26, %27		; <i32> [#uses=1]
+	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
+	%33 = add i32 %32, 16		; <i32> [#uses=1]
+	%34 = and i32 %33, 63		; <i32> [#uses=1]
+	%35 = and i32 %32, -64		; <i32> [#uses=1]
+	%36 = or i32 %34, %35		; <i32> [#uses=2]
+	%37 = inttoptr i32 %36 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	br label %bb2.i
+
+bb2.i:		; preds = %bb1.i1, %bb.i
+	%ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]		; <i32> [#uses=1]
+	%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn6.in.i = xor i32 %.pn6.in.in.i, 32		; <i32> [#uses=1]
+	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%46 = fsub double %39, %21		; <double> [#uses=1]
+	%47 = fsub double %45, %23		; <double> [#uses=1]
+	%48 = fmul double %46, %47		; <double> [#uses=1]
+	%49 = fsub double %43, %21		; <double> [#uses=1]
+	%50 = fsub double %41, %23		; <double> [#uses=1]
+	%51 = fmul double %49, %50		; <double> [#uses=1]
+	%52 = fsub double %48, %51		; <double> [#uses=1]
+	%53 = fcmp ogt double %52, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32		; <i32> [#uses=1]
+	%55 = xor i32 %54, 32		; <i32> [#uses=3]
+	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%63 = fsub double %60, %39		; <double> [#uses=1]
+	%64 = fsub double %23, %41		; <double> [#uses=1]
+	%65 = fmul double %63, %64		; <double> [#uses=1]
+	%66 = fsub double %21, %39		; <double> [#uses=1]
+	%67 = fsub double %62, %41		; <double> [#uses=1]
+	%68 = fmul double %66, %67		; <double> [#uses=1]
+	%69 = fsub double %65, %68		; <double> [#uses=1]
+	%70 = fcmp ogt double %69, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i:		; preds = %bb3.i
+	%71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb.i
+
+bb5.i:		; preds = %bb3.i
+	%73 = add i32 %55, 48		; <i32> [#uses=1]
+	%74 = and i32 %73, 63		; <i32> [#uses=1]
+	%75 = and i32 %55, -64		; <i32> [#uses=1]
+	%76 = or i32 %74, %75		; <i32> [#uses=1]
+	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
+	%81 = add i32 %80, 16		; <i32> [#uses=1]
+	%82 = and i32 %81, 63		; <i32> [#uses=1]
+	%83 = and i32 %80, -64		; <i32> [#uses=1]
+	%84 = or i32 %82, %83		; <i32> [#uses=1]
+	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%88 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
+	%89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+	%90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0		; <%struct.VERTEX**> [#uses=2]
+	store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+	%91 = ptrtoint %struct.edge_rec* %88 to i32		; <i32> [#uses=5]
+	%92 = add i32 %91, 16		; <i32> [#uses=2]
+	%93 = inttoptr i32 %92 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%94 = add i32 %91, 48		; <i32> [#uses=1]
+	%95 = inttoptr i32 %94 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+	%97 = add i32 %91, 32		; <i32> [#uses=1]
+	%98 = inttoptr i32 %97 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+	%100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+	%101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
+	%104 = add i32 %103, 16		; <i32> [#uses=1]
+	%105 = and i32 %104, 63		; <i32> [#uses=1]
+	%106 = and i32 %103, -64		; <i32> [#uses=1]
+	%107 = or i32 %105, %106		; <i32> [#uses=1]
+	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
+	%112 = add i32 %111, 16		; <i32> [#uses=1]
+	%113 = and i32 %112, 63		; <i32> [#uses=1]
+	%114 = and i32 %111, -64		; <i32> [#uses=1]
+	%115 = or i32 %113, %114		; <i32> [#uses=1]
+	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+	%123 = xor i32 %91, 32		; <i32> [#uses=1]
+	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
+	%128 = add i32 %127, 16		; <i32> [#uses=1]
+	%129 = and i32 %128, 63		; <i32> [#uses=1]
+	%130 = and i32 %127, -64		; <i32> [#uses=1]
+	%131 = or i32 %129, %130		; <i32> [#uses=1]
+	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
+	%136 = add i32 %135, 16		; <i32> [#uses=1]
+	%137 = and i32 %136, 63		; <i32> [#uses=1]
+	%138 = and i32 %135, -64		; <i32> [#uses=1]
+	%139 = or i32 %137, %138		; <i32> [#uses=1]
+	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+	%147 = and i32 %92, 63		; <i32> [#uses=1]
+	%148 = and i32 %91, -64		; <i32> [#uses=1]
+	%149 = or i32 %147, %148		; <i32> [#uses=1]
+	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
+	%154 = add i32 %153, 16		; <i32> [#uses=1]
+	%155 = and i32 %154, 63		; <i32> [#uses=1]
+	%156 = and i32 %153, -64		; <i32> [#uses=1]
+	%157 = or i32 %155, %156		; <i32> [#uses=1]
+	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
+	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
+	%165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
+	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
+	br label %bb9.i
+
+bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
+	%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
+	%168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
+	%173 = xor i32 %172, 32		; <i32> [#uses=1]
+	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
+	%178 = xor i32 %177, 32		; <i32> [#uses=1]
+	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%194 = fsub double %183, %191		; <double> [#uses=1]
+	%195 = fsub double %189, %193		; <double> [#uses=1]
+	%196 = fmul double %194, %195		; <double> [#uses=1]
+	%197 = fsub double %187, %191		; <double> [#uses=1]
+	%198 = fsub double %185, %193		; <double> [#uses=1]
+	%199 = fmul double %197, %198		; <double> [#uses=1]
+	%200 = fsub double %196, %199		; <double> [#uses=1]
+	%201 = fcmp ogt double %200, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i:		; preds = %bb9.i
+	%202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb12.i
+
+bb11.i:		; preds = %bb12.i
+	%203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32		; <i32> [#uses=3]
+	%204 = add i32 %203, 16		; <i32> [#uses=1]
+	%205 = and i32 %204, 63		; <i32> [#uses=1]
+	%206 = and i32 %203, -64		; <i32> [#uses=3]
+	%207 = or i32 %205, %206		; <i32> [#uses=1]
+	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
+	%212 = add i32 %211, 16		; <i32> [#uses=1]
+	%213 = and i32 %212, 63		; <i32> [#uses=1]
+	%214 = and i32 %211, -64		; <i32> [#uses=1]
+	%215 = or i32 %213, %214		; <i32> [#uses=1]
+	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
+	%220 = add i32 %219, 16		; <i32> [#uses=1]
+	%221 = and i32 %220, 63		; <i32> [#uses=1]
+	%222 = and i32 %219, -64		; <i32> [#uses=1]
+	%223 = or i32 %221, %222		; <i32> [#uses=1]
+	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
+	%228 = add i32 %227, 16		; <i32> [#uses=1]
+	%229 = and i32 %228, 63		; <i32> [#uses=1]
+	%230 = and i32 %227, -64		; <i32> [#uses=1]
+	%231 = or i32 %229, %230		; <i32> [#uses=1]
+	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+	%239 = xor i32 %203, 32		; <i32> [#uses=2]
+	%240 = add i32 %239, 16		; <i32> [#uses=1]
+	%241 = and i32 %240, 63		; <i32> [#uses=1]
+	%242 = or i32 %241, %206		; <i32> [#uses=1]
+	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
+	%247 = add i32 %246, 16		; <i32> [#uses=1]
+	%248 = and i32 %247, 63		; <i32> [#uses=1]
+	%249 = and i32 %246, -64		; <i32> [#uses=1]
+	%250 = or i32 %248, %249		; <i32> [#uses=1]
+	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
+	%256 = add i32 %255, 16		; <i32> [#uses=1]
+	%257 = and i32 %256, 63		; <i32> [#uses=1]
+	%258 = and i32 %255, -64		; <i32> [#uses=1]
+	%259 = or i32 %257, %258		; <i32> [#uses=1]
+	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
+	%264 = add i32 %263, 16		; <i32> [#uses=1]
+	%265 = and i32 %264, 63		; <i32> [#uses=1]
+	%266 = and i32 %263, -64		; <i32> [#uses=1]
+	%267 = or i32 %265, %266		; <i32> [#uses=1]
+	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+	%277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	br label %bb12.i
+
+bb12.i:		; preds = %bb11.i, %bb10.i
+	%avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]		; <%struct.edge_rec*> [#uses=2]
+	%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]		; <double> [#uses=3]
+	%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]		; <double> [#uses=3]
+	%lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn5.in.i = xor i32 %.pn5.in.in.i, 32		; <i32> [#uses=1]
+	%.pn4.in.i = xor i32 %.pn4.in.in.i, 32		; <i32> [#uses=1]
+	%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%284 = fsub double %283, %280		; <double> [#uses=2]
+	%285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%287 = fsub double %286, %279		; <double> [#uses=2]
+	%288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%292 = fsub double %291, %280		; <double> [#uses=2]
+	%293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%295 = fsub double %294, %279		; <double> [#uses=2]
+	%296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%300 = fsub double %299, %280		; <double> [#uses=2]
+	%301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%303 = fsub double %302, %279		; <double> [#uses=2]
+	%304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%306 = fsub double %289, %281		; <double> [#uses=1]
+	%307 = fmul double %292, %303		; <double> [#uses=1]
+	%308 = fmul double %295, %300		; <double> [#uses=1]
+	%309 = fsub double %307, %308		; <double> [#uses=1]
+	%310 = fmul double %306, %309		; <double> [#uses=1]
+	%311 = fsub double %297, %281		; <double> [#uses=1]
+	%312 = fmul double %300, %287		; <double> [#uses=1]
+	%313 = fmul double %303, %284		; <double> [#uses=1]
+	%314 = fsub double %312, %313		; <double> [#uses=1]
+	%315 = fmul double %311, %314		; <double> [#uses=1]
+	%316 = fadd double %315, %310		; <double> [#uses=1]
+	%317 = fsub double %305, %281		; <double> [#uses=1]
+	%318 = fmul double %284, %295		; <double> [#uses=1]
+	%319 = fmul double %287, %292		; <double> [#uses=1]
+	%320 = fsub double %318, %319		; <double> [#uses=1]
+	%321 = fmul double %317, %320		; <double> [#uses=1]
+	%322 = fadd double %321, %316		; <double> [#uses=1]
+	%323 = fcmp ogt double %322, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i:		; preds = %bb12.i
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb13.i
+
+bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
+	%324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]		; <%struct.VERTEX*> [#uses=4]
+	%325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]		; <%struct.VERTEX*> [#uses=3]
+	%lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]		; <%struct.edge_rec*> [#uses=3]
+	%326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32		; <i32> [#uses=2]
+	%327 = add i32 %326, 16		; <i32> [#uses=1]
+	%328 = and i32 %327, 63		; <i32> [#uses=1]
+	%329 = and i32 %326, -64		; <i32> [#uses=1]
+	%330 = or i32 %328, %329		; <i32> [#uses=1]
+	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
+	%335 = add i32 %334, 16		; <i32> [#uses=1]
+	%336 = and i32 %335, 63		; <i32> [#uses=1]
+	%337 = and i32 %334, -64		; <i32> [#uses=1]
+	%338 = or i32 %336, %337		; <i32> [#uses=3]
+	%339 = xor i32 %338, 32		; <i32> [#uses=1]
+	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%355 = fsub double %344, %352		; <double> [#uses=1]
+	%356 = fsub double %350, %354		; <double> [#uses=1]
+	%357 = fmul double %355, %356		; <double> [#uses=1]
+	%358 = fsub double %348, %352		; <double> [#uses=1]
+	%359 = fsub double %346, %354		; <double> [#uses=1]
+	%360 = fmul double %358, %359		; <double> [#uses=1]
+	%361 = fsub double %357, %360		; <double> [#uses=1]
+	%362 = fcmp ogt double %361, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i:		; preds = %bb13.i
+	%363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb16.i
+
+bb15.i:		; preds = %bb16.i
+	%364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32		; <i32> [#uses=3]
+	%365 = add i32 %364, 16		; <i32> [#uses=1]
+	%366 = and i32 %365, 63		; <i32> [#uses=1]
+	%367 = and i32 %364, -64		; <i32> [#uses=3]
+	%368 = or i32 %366, %367		; <i32> [#uses=1]
+	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
+	%373 = add i32 %372, 16		; <i32> [#uses=1]
+	%374 = and i32 %373, 63		; <i32> [#uses=1]
+	%375 = and i32 %372, -64		; <i32> [#uses=1]
+	%376 = or i32 %374, %375		; <i32> [#uses=1]
+	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
+	%381 = add i32 %380, 16		; <i32> [#uses=1]
+	%382 = and i32 %381, 63		; <i32> [#uses=1]
+	%383 = and i32 %380, -64		; <i32> [#uses=1]
+	%384 = or i32 %382, %383		; <i32> [#uses=1]
+	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
+	%389 = add i32 %388, 16		; <i32> [#uses=1]
+	%390 = and i32 %389, 63		; <i32> [#uses=1]
+	%391 = and i32 %388, -64		; <i32> [#uses=1]
+	%392 = or i32 %390, %391		; <i32> [#uses=1]
+	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+	%400 = xor i32 %364, 32		; <i32> [#uses=2]
+	%401 = add i32 %400, 16		; <i32> [#uses=1]
+	%402 = and i32 %401, 63		; <i32> [#uses=1]
+	%403 = or i32 %402, %367		; <i32> [#uses=1]
+	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
+	%408 = add i32 %407, 16		; <i32> [#uses=1]
+	%409 = and i32 %408, 63		; <i32> [#uses=1]
+	%410 = and i32 %407, -64		; <i32> [#uses=1]
+	%411 = or i32 %409, %410		; <i32> [#uses=1]
+	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
+	%417 = add i32 %416, 16		; <i32> [#uses=1]
+	%418 = and i32 %417, 63		; <i32> [#uses=1]
+	%419 = and i32 %416, -64		; <i32> [#uses=1]
+	%420 = or i32 %418, %419		; <i32> [#uses=1]
+	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
+	%425 = add i32 %424, 16		; <i32> [#uses=1]
+	%426 = and i32 %425, 63		; <i32> [#uses=1]
+	%427 = and i32 %424, -64		; <i32> [#uses=1]
+	%428 = or i32 %426, %427		; <i32> [#uses=1]
+	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+	%438 = add i32 %t.1.in.i, 16		; <i32> [#uses=1]
+	%439 = and i32 %438, 63		; <i32> [#uses=1]
+	%440 = and i32 %t.1.in.i, -64		; <i32> [#uses=1]
+	%441 = or i32 %439, %440		; <i32> [#uses=1]
+	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
+	%446 = add i32 %445, 16		; <i32> [#uses=1]
+	%447 = and i32 %446, 63		; <i32> [#uses=1]
+	%448 = and i32 %445, -64		; <i32> [#uses=1]
+	%449 = or i32 %447, %448		; <i32> [#uses=2]
+	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	br label %bb16.i
+
+bb16.i:		; preds = %bb15.i, %bb14.i
+	%avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]		; <%struct.edge_rec*> [#uses=2]
+	%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]		; <double> [#uses=3]
+	%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]		; <double> [#uses=3]
+	%rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=3]
+	%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=1]
+	%.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn3.in.i = xor i32 %.pn3.in.in.i, 32		; <i32> [#uses=1]
+	%.pn.in.i = xor i32 %.pn.in.in.i, 32		; <i32> [#uses=1]
+	%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%455 = fsub double %454, %451		; <double> [#uses=2]
+	%456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%458 = fsub double %457, %450		; <double> [#uses=2]
+	%459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%463 = fsub double %462, %451		; <double> [#uses=2]
+	%464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%466 = fsub double %465, %450		; <double> [#uses=2]
+	%467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%471 = fsub double %470, %451		; <double> [#uses=2]
+	%472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%474 = fsub double %473, %450		; <double> [#uses=2]
+	%475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%477 = fsub double %460, %452		; <double> [#uses=1]
+	%478 = fmul double %463, %474		; <double> [#uses=1]
+	%479 = fmul double %466, %471		; <double> [#uses=1]
+	%480 = fsub double %478, %479		; <double> [#uses=1]
+	%481 = fmul double %477, %480		; <double> [#uses=1]
+	%482 = fsub double %468, %452		; <double> [#uses=1]
+	%483 = fmul double %471, %458		; <double> [#uses=1]
+	%484 = fmul double %474, %455		; <double> [#uses=1]
+	%485 = fsub double %483, %484		; <double> [#uses=1]
+	%486 = fmul double %482, %485		; <double> [#uses=1]
+	%487 = fadd double %486, %481		; <double> [#uses=1]
+	%488 = fsub double %476, %452		; <double> [#uses=1]
+	%489 = fmul double %455, %466		; <double> [#uses=1]
+	%490 = fmul double %458, %463		; <double> [#uses=1]
+	%491 = fsub double %489, %490		; <double> [#uses=1]
+	%492 = fmul double %488, %491		; <double> [#uses=1]
+	%493 = fadd double %492, %487		; <double> [#uses=1]
+	%494 = fcmp ogt double %493, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i:		; preds = %bb16.i
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb17.i
+
+bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
+	%495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]		; <%struct.edge_rec*> [#uses=3]
+	%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32		; <i32> [#uses=1]
+	%498 = xor i32 %497, 32		; <i32> [#uses=1]
+	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%514 = fsub double %503, %511		; <double> [#uses=2]
+	%515 = fsub double %509, %513		; <double> [#uses=1]
+	%516 = fmul double %514, %515		; <double> [#uses=1]
+	%517 = fsub double %507, %511		; <double> [#uses=1]
+	%518 = fsub double %505, %513		; <double> [#uses=2]
+	%519 = fmul double %517, %518		; <double> [#uses=1]
+	%520 = fsub double %516, %519		; <double> [#uses=1]
+	%521 = fcmp ogt double %520, 0.000000e+00		; <i1> [#uses=2]
+	%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32		; <i32> [#uses=3]
+	%523 = xor i32 %522, 32		; <i32> [#uses=1]
+	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%531 = fsub double %530, %513		; <double> [#uses=1]
+	%532 = fmul double %514, %531		; <double> [#uses=1]
+	%533 = fsub double %528, %511		; <double> [#uses=1]
+	%534 = fmul double %533, %518		; <double> [#uses=1]
+	%535 = fsub double %532, %534		; <double> [#uses=1]
+	%536 = fcmp ogt double %535, 0.000000e+00		; <i1> [#uses=2]
+	%537 = or i1 %536, %521		; <i1> [#uses=1]
+	br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i:		; preds = %bb17.i
+	%538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i:		; preds = %bb21.i
+	br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i:		; preds = %bb22.i
+	%542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%544 = fsub double %507, %528		; <double> [#uses=2]
+	%545 = fsub double %509, %530		; <double> [#uses=2]
+	%546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%550 = fsub double %549, %528		; <double> [#uses=2]
+	%551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%553 = fsub double %552, %530		; <double> [#uses=2]
+	%554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%558 = fsub double %557, %528		; <double> [#uses=2]
+	%559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%561 = fsub double %560, %530		; <double> [#uses=2]
+	%562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%564 = fsub double %547, %543		; <double> [#uses=1]
+	%565 = fmul double %550, %561		; <double> [#uses=1]
+	%566 = fmul double %553, %558		; <double> [#uses=1]
+	%567 = fsub double %565, %566		; <double> [#uses=1]
+	%568 = fmul double %564, %567		; <double> [#uses=1]
+	%569 = fsub double %555, %543		; <double> [#uses=1]
+	%570 = fmul double %558, %545		; <double> [#uses=1]
+	%571 = fmul double %561, %544		; <double> [#uses=1]
+	%572 = fsub double %570, %571		; <double> [#uses=1]
+	%573 = fmul double %569, %572		; <double> [#uses=1]
+	%574 = fadd double %573, %568		; <double> [#uses=1]
+	%575 = fsub double %563, %543		; <double> [#uses=1]
+	%576 = fmul double %544, %553		; <double> [#uses=1]
+	%577 = fmul double %545, %550		; <double> [#uses=1]
+	%578 = fsub double %576, %577		; <double> [#uses=1]
+	%579 = fmul double %575, %578		; <double> [#uses=1]
+	%580 = fadd double %579, %574		; <double> [#uses=1]
+	%581 = fcmp ogt double %580, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i:		; preds = %bb23.i, %bb21.i
+	%582 = add i32 %522, 48		; <i32> [#uses=1]
+	%583 = and i32 %582, 63		; <i32> [#uses=1]
+	%584 = and i32 %522, -64		; <i32> [#uses=1]
+	%585 = or i32 %583, %584		; <i32> [#uses=1]
+	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
+	%590 = add i32 %589, 16		; <i32> [#uses=1]
+	%591 = and i32 %590, 63		; <i32> [#uses=1]
+	%592 = and i32 %589, -64		; <i32> [#uses=1]
+	%593 = or i32 %591, %592		; <i32> [#uses=1]
+	%594 = inttoptr i32 %593 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%595 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+	%597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+	%598 = ptrtoint %struct.edge_rec* %595 to i32		; <i32> [#uses=5]
+	%599 = add i32 %598, 16		; <i32> [#uses=1]
+	%600 = inttoptr i32 %599 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%601 = add i32 %598, 48		; <i32> [#uses=1]
+	%602 = inttoptr i32 %601 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+	%604 = add i32 %598, 32		; <i32> [#uses=1]
+	%605 = inttoptr i32 %604 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+	%607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+	%608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
+	%611 = add i32 %610, 16		; <i32> [#uses=1]
+	%612 = and i32 %611, 63		; <i32> [#uses=1]
+	%613 = and i32 %610, -64		; <i32> [#uses=1]
+	%614 = or i32 %612, %613		; <i32> [#uses=1]
+	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
+	%619 = add i32 %618, 16		; <i32> [#uses=1]
+	%620 = and i32 %619, 63		; <i32> [#uses=1]
+	%621 = and i32 %618, -64		; <i32> [#uses=1]
+	%622 = or i32 %620, %621		; <i32> [#uses=1]
+	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+	%630 = xor i32 %598, 32		; <i32> [#uses=2]
+	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
+	%635 = add i32 %634, 16		; <i32> [#uses=1]
+	%636 = and i32 %635, 63		; <i32> [#uses=1]
+	%637 = and i32 %634, -64		; <i32> [#uses=1]
+	%638 = or i32 %636, %637		; <i32> [#uses=1]
+	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
+	%643 = add i32 %642, 16		; <i32> [#uses=1]
+	%644 = and i32 %643, 63		; <i32> [#uses=1]
+	%645 = and i32 %642, -64		; <i32> [#uses=1]
+	%646 = or i32 %644, %645		; <i32> [#uses=1]
+	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+	%654 = add i32 %630, 48		; <i32> [#uses=1]
+	%655 = and i32 %654, 63		; <i32> [#uses=1]
+	%656 = and i32 %598, -64		; <i32> [#uses=1]
+	%657 = or i32 %655, %656		; <i32> [#uses=1]
+	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
+	%662 = add i32 %661, 16		; <i32> [#uses=1]
+	%663 = and i32 %662, 63		; <i32> [#uses=1]
+	%664 = and i32 %661, -64		; <i32> [#uses=1]
+	%665 = or i32 %663, %664		; <i32> [#uses=1]
+	%666 = inttoptr i32 %665 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+bb25.i:		; preds = %bb23.i, %bb22.i
+	%667 = add i32 %172, 16		; <i32> [#uses=1]
+	%668 = and i32 %667, 63		; <i32> [#uses=1]
+	%669 = and i32 %172, -64		; <i32> [#uses=1]
+	%670 = or i32 %668, %669		; <i32> [#uses=1]
+	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
+	%675 = add i32 %674, 16		; <i32> [#uses=1]
+	%676 = and i32 %675, 63		; <i32> [#uses=1]
+	%677 = and i32 %674, -64		; <i32> [#uses=1]
+	%678 = or i32 %676, %677		; <i32> [#uses=1]
+	%679 = inttoptr i32 %678 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%680 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1		; <%struct.edge_rec**> [#uses=5]
+	store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+	%682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+	%683 = ptrtoint %struct.edge_rec* %680 to i32		; <i32> [#uses=4]
+	%684 = add i32 %683, 16		; <i32> [#uses=1]
+	%685 = inttoptr i32 %684 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%686 = add i32 %683, 48		; <i32> [#uses=1]
+	%687 = inttoptr i32 %686 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+	%689 = add i32 %683, 32		; <i32> [#uses=1]
+	%690 = inttoptr i32 %689 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+	%692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+	%693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
+	%696 = add i32 %695, 16		; <i32> [#uses=1]
+	%697 = and i32 %696, 63		; <i32> [#uses=1]
+	%698 = and i32 %695, -64		; <i32> [#uses=1]
+	%699 = or i32 %697, %698		; <i32> [#uses=1]
+	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
+	%704 = add i32 %703, 16		; <i32> [#uses=1]
+	%705 = and i32 %704, 63		; <i32> [#uses=1]
+	%706 = and i32 %703, -64		; <i32> [#uses=1]
+	%707 = or i32 %705, %706		; <i32> [#uses=1]
+	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+	%715 = xor i32 %683, 32		; <i32> [#uses=1]
+	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
+	%720 = add i32 %719, 16		; <i32> [#uses=1]
+	%721 = and i32 %720, 63		; <i32> [#uses=1]
+	%722 = and i32 %719, -64		; <i32> [#uses=1]
+	%723 = or i32 %721, %722		; <i32> [#uses=1]
+	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
+	%728 = add i32 %727, 16		; <i32> [#uses=1]
+	%729 = and i32 %728, 63		; <i32> [#uses=1]
+	%730 = and i32 %727, -64		; <i32> [#uses=1]
+	%731 = or i32 %729, %730		; <i32> [#uses=1]
+	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+do_merge.exit:		; preds = %bb17.i
+	%740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2:		; preds = %bb2, %do_merge.exit
+	%ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]		; <%struct.edge_rec*> [#uses=1]
+	%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32		; <i32> [#uses=1]
+	%744 = xor i32 %743, 32		; <i32> [#uses=1]
+	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4:		; preds = %bb5.loopexit, %bb4
+	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
+	%751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
+	%754 = xor i32 %753, 32		; <i32> [#uses=1]
+	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
+	br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit:		; preds = %bb2, %do_merge.exit
+	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
+	%759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
+	br i1 %761, label %bb6, label %bb4
+
+bb6:		; preds = %bb5.loopexit, %bb4
+	%rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]		; <%struct.edge_rec*> [#uses=1]
+	%tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32		; <i32> [#uses=1]
+	%tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32		; <i32> [#uses=1]
+	br label %bb15
+
+bb7:		; preds = %bb
+	%762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
+	%765 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+	%767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	br i1 %764, label %bb10, label %bb11
+
+bb8:		; preds = %entry
+	%768 = call arm_apcscc  i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call arm_apcscc  void @exit(i32 -1) noreturn nounwind
+	unreachable
+
+bb10:		; preds = %bb7
+	store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+	%769 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=5]
+	%770 = add i32 %769, 16		; <i32> [#uses=1]
+	%771 = inttoptr i32 %770 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%772 = add i32 %769, 48		; <i32> [#uses=1]
+	%773 = inttoptr i32 %772 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+	%775 = add i32 %769, 32		; <i32> [#uses=1]
+	%776 = inttoptr i32 %775 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+	%778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+	%779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+	%780 = xor i32 %769, 32		; <i32> [#uses=1]
+	br label %bb15
+
+bb11:		; preds = %bb7
+	store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+	%781 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=6]
+	%782 = add i32 %781, 16		; <i32> [#uses=1]
+	%783 = inttoptr i32 %782 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%784 = add i32 %781, 48		; <i32> [#uses=1]
+	%785 = inttoptr i32 %784 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+	%787 = add i32 %781, 32		; <i32> [#uses=1]
+	%788 = inttoptr i32 %787 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+	%790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+	%791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+	%792 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+	%794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+	%795 = ptrtoint %struct.edge_rec* %792 to i32		; <i32> [#uses=5]
+	%796 = add i32 %795, 16		; <i32> [#uses=1]
+	%797 = inttoptr i32 %796 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%798 = add i32 %795, 48		; <i32> [#uses=2]
+	%799 = inttoptr i32 %798 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+	%801 = add i32 %795, 32		; <i32> [#uses=1]
+	%802 = inttoptr i32 %801 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+	%804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+	%805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+	%806 = xor i32 %781, 32		; <i32> [#uses=1]
+	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
+	%811 = add i32 %810, 16		; <i32> [#uses=1]
+	%812 = and i32 %811, 63		; <i32> [#uses=1]
+	%813 = and i32 %810, -64		; <i32> [#uses=1]
+	%814 = or i32 %812, %813		; <i32> [#uses=1]
+	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
+	%818 = add i32 %817, 16		; <i32> [#uses=1]
+	%819 = and i32 %818, 63		; <i32> [#uses=1]
+	%820 = and i32 %817, -64		; <i32> [#uses=1]
+	%821 = or i32 %819, %820		; <i32> [#uses=1]
+	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+	%829 = xor i32 %795, 32		; <i32> [#uses=3]
+	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%833 = and i32 %798, 63		; <i32> [#uses=1]
+	%834 = and i32 %795, -64		; <i32> [#uses=1]
+	%835 = or i32 %833, %834		; <i32> [#uses=1]
+	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
+	%840 = add i32 %839, 16		; <i32> [#uses=1]
+	%841 = and i32 %840, 63		; <i32> [#uses=1]
+	%842 = and i32 %839, -64		; <i32> [#uses=1]
+	%843 = or i32 %841, %842		; <i32> [#uses=1]
+	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%846 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
+	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+	%848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+	%849 = ptrtoint %struct.edge_rec* %846 to i32		; <i32> [#uses=6]
+	%850 = add i32 %849, 16		; <i32> [#uses=2]
+	%851 = inttoptr i32 %850 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%852 = add i32 %849, 48		; <i32> [#uses=1]
+	%853 = inttoptr i32 %852 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+	%855 = add i32 %849, 32		; <i32> [#uses=1]
+	%856 = inttoptr i32 %855 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+	%858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+	%859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
+	%862 = add i32 %861, 16		; <i32> [#uses=1]
+	%863 = and i32 %862, 63		; <i32> [#uses=1]
+	%864 = and i32 %861, -64		; <i32> [#uses=1]
+	%865 = or i32 %863, %864		; <i32> [#uses=1]
+	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
+	%870 = add i32 %869, 16		; <i32> [#uses=1]
+	%871 = and i32 %870, 63		; <i32> [#uses=1]
+	%872 = and i32 %869, -64		; <i32> [#uses=1]
+	%873 = or i32 %871, %872		; <i32> [#uses=1]
+	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+	%881 = xor i32 %849, 32		; <i32> [#uses=3]
+	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
+	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
+	%886 = add i32 %885, 16		; <i32> [#uses=1]
+	%887 = and i32 %886, 63		; <i32> [#uses=1]
+	%888 = and i32 %885, -64		; <i32> [#uses=1]
+	%889 = or i32 %887, %888		; <i32> [#uses=1]
+	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
+	%893 = add i32 %892, 16		; <i32> [#uses=1]
+	%894 = and i32 %893, 63		; <i32> [#uses=1]
+	%895 = and i32 %892, -64		; <i32> [#uses=1]
+	%896 = or i32 %894, %895		; <i32> [#uses=1]
+	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+	%904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%916 = fsub double %905, %913		; <double> [#uses=1]
+	%917 = fsub double %911, %915		; <double> [#uses=1]
+	%918 = fmul double %916, %917		; <double> [#uses=1]
+	%919 = fsub double %909, %913		; <double> [#uses=1]
+	%920 = fsub double %907, %915		; <double> [#uses=1]
+	%921 = fmul double %919, %920		; <double> [#uses=1]
+	%922 = fsub double %918, %921		; <double> [#uses=1]
+	%923 = fcmp ogt double %922, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %923, label %bb15, label %bb13
+
+bb13:		; preds = %bb11
+	%924 = fsub double %905, %909		; <double> [#uses=1]
+	%925 = fsub double %915, %911		; <double> [#uses=1]
+	%926 = fmul double %924, %925		; <double> [#uses=1]
+	%927 = fsub double %913, %909		; <double> [#uses=1]
+	%928 = fsub double %907, %911		; <double> [#uses=1]
+	%929 = fmul double %927, %928		; <double> [#uses=1]
+	%930 = fsub double %926, %929		; <double> [#uses=1]
+	%931 = fcmp ogt double %930, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %931, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%932 = and i32 %850, 63		; <i32> [#uses=1]
+	%933 = and i32 %849, -64		; <i32> [#uses=3]
+	%934 = or i32 %932, %933		; <i32> [#uses=1]
+	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
+	%939 = add i32 %938, 16		; <i32> [#uses=1]
+	%940 = and i32 %939, 63		; <i32> [#uses=1]
+	%941 = and i32 %938, -64		; <i32> [#uses=1]
+	%942 = or i32 %940, %941		; <i32> [#uses=1]
+	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
+	%946 = add i32 %945, 16		; <i32> [#uses=1]
+	%947 = and i32 %946, 63		; <i32> [#uses=1]
+	%948 = and i32 %945, -64		; <i32> [#uses=1]
+	%949 = or i32 %947, %948		; <i32> [#uses=1]
+	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
+	%954 = add i32 %953, 16		; <i32> [#uses=1]
+	%955 = and i32 %954, 63		; <i32> [#uses=1]
+	%956 = and i32 %953, -64		; <i32> [#uses=1]
+	%957 = or i32 %955, %956		; <i32> [#uses=1]
+	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+	%965 = add i32 %881, 16		; <i32> [#uses=1]
+	%966 = and i32 %965, 63		; <i32> [#uses=1]
+	%967 = or i32 %966, %933		; <i32> [#uses=1]
+	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
+	%972 = add i32 %971, 16		; <i32> [#uses=1]
+	%973 = and i32 %972, 63		; <i32> [#uses=1]
+	%974 = and i32 %971, -64		; <i32> [#uses=1]
+	%975 = or i32 %973, %974		; <i32> [#uses=1]
+	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
+	%979 = add i32 %978, 16		; <i32> [#uses=1]
+	%980 = and i32 %979, 63		; <i32> [#uses=1]
+	%981 = and i32 %978, -64		; <i32> [#uses=1]
+	%982 = or i32 %980, %981		; <i32> [#uses=1]
+	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
+	%987 = add i32 %986, 16		; <i32> [#uses=1]
+	%988 = and i32 %987, 63		; <i32> [#uses=1]
+	%989 = and i32 %986, -64		; <i32> [#uses=1]
+	%990 = or i32 %988, %989		; <i32> [#uses=1]
+	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+	%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]		; <i32> [#uses=1]
+	%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]		; <i32> [#uses=1]
+	%agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*		; <i64*> [#uses=1]
+	%1001 = zext i32 %retval.0.0 to i64		; <i64> [#uses=1]
+	%1002 = zext i32 %retval.1.0 to i64		; <i64> [#uses=1]
+	%1003 = shl i64 %1002, 32		; <i64> [#uses=1]
+	%1004 = or i64 %1003, %1001		; <i64> [#uses=1]
+	store i64 %1004, i64* %agg.result162, align 4
+	ret void
+}
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc void @exit(i32) noreturn nounwind
+
+declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind
diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 0000000..b4b989b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	store i16 %parts, i16* undef, align 4
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
+	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
+	%2 = tail call arm_apcscc  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 0000000..24f4990
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	%0 = getelementptr i8* null, i32 10		; <i8*> [#uses=1]
+	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
+	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%3 = add i16 %2, 1		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %4, 3		; <i32> [#uses=1]
+	%6 = add i32 %5, -1		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, undef		; <i1> [#uses=1]
+	br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 0000000..e1d19d1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb28, label %bb
+
+bb28:		; preds = %bb
+	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%1 = fadd double %0, undef		; <double> [#uses=2]
+	br i1 undef, label %bb59, label %bb60
+
+bb59:		; preds = %bb28
+	%2 = fsub double -0.000000e+00, undef		; <double> [#uses=2]
+	br label %bb61
+
+bb60:		; preds = %bb28
+	%3 = tail call double @llvm.exp.f64(double undef) nounwind		; <double> [#uses=1]
+	%4 = fsub double -0.000000e+00, %3		; <double> [#uses=2]
+	%5 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	%6 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	br label %bb61
+
+bb61:		; preds = %bb60, %bb59
+	%.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ]		; <double> [#uses=1]
+	%.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ]		; <double> [#uses=1]
+	%.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ]		; <double> [#uses=0]
+	%.pn390 = fdiv double %.pn452, undef		; <double> [#uses=0]
+	%.pn145 = fdiv double %.pn238, %1		; <double> [#uses=0]
+	%.pn138 = fdiv double %.pn230, undef		; <double> [#uses=1]
+	%.pn139 = fdiv double %.pn228, undef		; <double> [#uses=1]
+	%.pn134 = fdiv double %.pn224, %0		; <double> [#uses=1]
+	%.pn135 = fdiv double %.pn222, %1		; <double> [#uses=1]
+	%.pn133 = fdiv double %.pn218, undef		; <double> [#uses=0]
+	%.pn128 = fdiv double %.pn214, undef		; <double> [#uses=1]
+	%.pn129 = fdiv double %.pn212, %.pn213		; <double> [#uses=1]
+	%.pn126 = fdiv double %.pn210, undef		; <double> [#uses=0]
+	%.pn54.in = fmul double undef, %.pn201		; <double> [#uses=1]
+	%.pn42.in = fmul double undef, undef		; <double> [#uses=1]
+	%.pn76 = fsub double %.pn138, %.pn139		; <double> [#uses=1]
+	%.pn74 = fsub double %.pn134, %.pn135		; <double> [#uses=1]
+	%.pn70 = fsub double %.pn128, %.pn129		; <double> [#uses=1]
+	%.pn54 = fdiv double %.pn54.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn64 = fmul double undef, 0x3FE5555555555555		; <double> [#uses=1]
+	%.pn65 = fmul double undef, undef		; <double> [#uses=1]
+	%.pn50 = fmul double undef, %.pn111		; <double> [#uses=0]
+	%.pn42 = fdiv double %.pn42.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn40 = fmul double undef, %.pn85		; <double> [#uses=0]
+	%.pn56 = fadd double %.pn76, undef		; <double> [#uses=1]
+	%.pn57 = fmul double %.pn74, undef		; <double> [#uses=1]
+	%.pn36 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn37 = fmul double %.pn70, undef		; <double> [#uses=1]
+	%.pn33 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn29 = fsub double %.pn64, %.pn65		; <double> [#uses=1]
+	%.pn21 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn27 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn11 = fadd double %.pn56, %.pn57		; <double> [#uses=1]
+	%.pn32 = fmul double %.pn54, undef		; <double> [#uses=1]
+	%.pn26 = fmul double %.pn42, undef		; <double> [#uses=1]
+	%.pn15 = fmul double 0.000000e+00, %.pn39		; <double> [#uses=1]
+	%.pn7 = fadd double %.pn36, %.pn37		; <double> [#uses=1]
+	%.pn30 = fsub double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fadd double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn24 = fsub double %.pn28, %.pn29		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, %.pn27		; <double> [#uses=1]
+	%.pn20 = fadd double %.pn24, undef		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fsub double %.pn20, %.pn21		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, undef		; <double> [#uses=1]
+	%.pn12 = fadd double %.pn16, undef		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, %.pn15		; <double> [#uses=1]
+	%.pn8 = fsub double %.pn12, undef		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, %.pn11		; <double> [#uses=1]
+	%.pn4 = fadd double %.pn8, undef		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, %.pn7		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn4, undef		; <double> [#uses=1]
+	%D1.0 = fsub double %.pn2, undef		; <double> [#uses=2]
+	br i1 undef, label %bb62, label %bb64
+
+bb62:		; preds = %bb61
+	%7 = fadd double %D1.0, undef		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb61
+	%.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ]		; <double> [#uses=1]
+	%.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %.pn, %.pn1		; <double> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 0000000..2d4e58d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+	%v_addr = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%f_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%0 = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <4 x i32> %v, <4 x i32>* %v_addr
+	store i32 %f, i32* %f_addr
+	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
+	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
+	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %5, <4 x i32>* %0, align 16
+	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %6, <4 x i32>* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %retval1
+}
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 0000000..65ffed2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	%0 = icmp eq i32 %asmtmp.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i:		; preds = %bb5.i
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i:		; preds = %bb6.i
+	unreachable
+
+fault_in_pages_writeable.exit:		; preds = %bb6.i, %bb5.i, %entry
+	br i1 undef, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%1 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 0000000..9e5372a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit:		; preds = %bb5.i, %entry
+	%0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ]		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%2 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 0000000..18d68f7
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+  br label %bb8
+
+bb:                                               ; preds = %bb8
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+  %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+  %0 = icmp eq i32 %asmresult, 0                  ; <i1> [#uses=1]
+  br i1 %0, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb2
+  br i1 undef, label %bb10, label %bb9
+
+bb7:                                              ; preds = %bb2
+  %1 = add i32 %2, 1                              ; <i32> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %entry
+  %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
+  %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
+  br i1 %4, label %bb, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4
+  store i32 undef, i32* %count, align 4
+  ret i32 0
+
+bb10:                                             ; preds = %bb4, %bb
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 0000000..a46482c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm
+; PR4716
+
+define arm_aapcscc void @_start() nounwind naked {
+entry:
+  tail call arm_aapcscc  void @exit(i32 undef) noreturn nounwind
+  unreachable
+}
+
+declare arm_aapcscc void @exit(i32) noreturn nounwind
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 0000000..84915c4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
+  %2 = fmul double %1, %1                         ; <double> [#uses=1]
+  %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
+  %4 = fmul double %3, %3                         ; <double> [#uses=1]
+  %5 = fadd double %2, %4                         ; <double> [#uses=1]
+  %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+  br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i:                                            ; preds = %bb4.i
+  br label %bb7.i4
+
+bb7.i4:                                           ; preds = %bb6.i, %bb4.i
+  %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+  unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 0000000..a21ffc3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare arm_apcscc double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+  %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 0000000..e3d8ea6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+  br i1 %p, label %bb8, label %bb1
+
+bb1:                                              ; preds = %entry
+  %0 = malloc %struct.Village                     ; <%struct.Village*> [#uses=3]
+  %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+  %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
+  store i32 %.c, i32* null
+  %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %1
+  %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %2
+  ret %struct.Village* %0
+
+bb8:                                              ; preds = %entry
+  ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
new file mode 100644
index 0000000..9123377
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+  %0 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call arm_apcscc  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..0fad533
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 0000000..c6ef256
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+  %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+  %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+  %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+  %3 = extractelement <2 x i32> %2, i32 0         ; <i32> [#uses=1]
+  %not..i = icmp eq i32 %3, undef                 ; <i1> [#uses=1]
+  br i1 %not..i, label %return, label %bb221
+
+bb221:                                            ; preds = %bb221, %entry
+  br label %bb221
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 0000000..bc5bfe9
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define arm_apcscc void @aaaa(%quuz* %this, i8* %block) {
+entry:
+  br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269:                                        ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb194, %bb.nph269
+  %0 = fmul <4 x float> undef, undef              ; <<4 x float>> [#uses=1]
+  %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %3 = fadd <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb194, label %bb186
+
+bb186:                                            ; preds = %bb12
+  br label %bb194
+
+bb194:                                            ; preds = %bb186, %bb12
+  %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+  %indvar.next294 = add i32 undef, 1              ; <i32> [#uses=0]
+  br label %bb12
+
+bb201:                                            ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 0000000..d5178b4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() nounwind {
+entry:
+  %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+  %tmp28 = extractelement <2 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fcmp une float %tmp28, 4.900000e+01        ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 0000000..266fce6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @aaa() nounwind {
+entry:
+  %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+  %tmp31 = extractelement <4 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fpext float %tmp31 to double               ; <double> [#uses=1]
+  %2 = fsub double 1.000000e+00, %1               ; <double> [#uses=1]
+  %3 = fdiv double %2, 1.000000e+00               ; <double> [#uses=1]
+  %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+  %5 = fcmp ogt double %4, 1.000000e-05           ; <i1> [#uses=1]
+  br i1 %5, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  unreachable
+}
+
+declare double @fabs(double)
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 0000000..b6cf880
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+%struct.A = type { i32* }
+
+define arm_apcscc void @"\01-[MyFunction Name:]"() {
+entry:
+  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
+  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
+  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call arm_apcscc  void @_ZN1AC1Ev(%struct.A* %a)
+  invoke arm_apcscc  void @_Z3barv()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %entry
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  br label %return
+
+bb:                                               ; preds = %ppad
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.1, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.0, align 4
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  store i8* %0, i8** %eh_exception, align 4
+  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  store i32 %1, i32* %eh_selector, align 4
+  br label %Unwind
+
+return:                                           ; preds = %invcont
+  ret void
+
+lpad:                                             ; preds = %entry
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  store i32 %eh_select2, i32* %eh_selector
+  br label %ppad
+
+ppad:                                             ; preds = %lpad
+  br label %bb
+
+Unwind:                                           ; preds = %bb
+  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  unreachable
+}
+
+define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = call arm_apcscc  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %1, i32** %3, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare arm_apcscc i8* @_Znwm(i32)
+
+define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
+  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
+  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
+  call arm_apcscc  void @_ZdlPv(i8* %3) nounwind
+  br label %bb
+
+bb:                                               ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb
+  ret void
+}
+;CHECK: L_LSDA_1:
+
+declare arm_apcscc void @_ZdlPv(i8*) nounwind
+
+declare arm_apcscc void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 0000000..e1e60e6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+	ret <4 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
new file mode 100644
index 0000000..bf91fe0
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @getchar() nounwind
+
+define internal arm_apcscc i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 0000000..f654a16
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+  store <4 x i32> %0, <4 x i32>* undef, align 16
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 0000000..98cab9a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+  %1 = load double* %x, align 4
+  %2 = load double* %y, align 4
+  %3 = fsub double -0.000000e+00, %1
+  %4 = fcmp ugt double %2, %3
+  br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: fstdhi
+  store double %1, double* %y, align 4
+  br label %bb2
+
+bb2:
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 0000000..10653b5
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+  %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+  %2 = sub i32 %x, %1
+  %3 = add nsw i32 %2, %y
+  store i8 0, i8* %d, align 1
+  ret i32 %3
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 0000000..13adb24
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
+  br label %bb33
+
+bb:                                               ; preds = %bb33
+  %2 = fmul <4 x float> %val.i.i, undef           ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> %val2.i.i, undef          ; <<4 x float>> [#uses=1]
+  %4 = fadd <4 x float> %3, %2                    ; <<4 x float>> [#uses=1]
+  %5 = fmul <4 x float> %val4.i.i, undef          ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> %5, %4                    ; <<4 x float>> [#uses=1]
+  %7 = bitcast <4 x float> %6 to <4 x i32>        ; <<4 x i32>> [#uses=1]
+  %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+  %9 = or <4 x i32> %8, undef                     ; <<4 x i32>> [#uses=1]
+  %10 = bitcast <4 x i32> %9 to <4 x float>       ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> undef, %12               ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %0, undef                ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %14, %13                 ; <<4 x float>> [#uses=1]
+  %16 = fadd <4 x float> undef, %15               ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %1, %16                  ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> zeroinitializer, %17     ; <<4 x float>> [#uses=1]
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+  %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+  %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+  %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %25 = fadd <4 x float> %24, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %tmp46 = extractelement <4 x float> %25, i32 0  ; <float> [#uses=1]
+  %26 = fcmp olt float %tmp46, 0.000000e+00       ; <i1> [#uses=1]
+  br i1 %26, label %bb41, label %bb33
+
+bb33:                                             ; preds = %bb, %entry
+  br i1 undef, label %bb34, label %bb
+
+bb34:                                             ; preds = %bb33
+  ret i8 undef
+
+bb41:                                             ; preds = %bb
+  ret i8 1
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 0000000..758b59a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = ptrtoint i8* %pBuffer to i32
+
+  %lsr.iv2641 = inttoptr i32 %1 to float*
+  %tmp29 = add i32 %1, 4
+  %tmp2930 = inttoptr i32 %tmp29 to float*
+  %tmp31 = add i32 %1, 8
+  %tmp3132 = inttoptr i32 %tmp31 to float*
+  %tmp33 = add i32 %1, 12
+  %tmp3334 = inttoptr i32 %tmp33 to float*
+  %tmp35 = add i32 %1, 16
+  %tmp3536 = inttoptr i32 %tmp35 to float*
+  %tmp37 = add i32 %1, 20
+  %tmp3738 = inttoptr i32 %tmp37 to float*
+  %tmp39 = add i32 %1, 24
+  %tmp3940 = inttoptr i32 %tmp39 to float*
+  %2 = load float* %lsr.iv2641, align 4
+  %3 = load float* %tmp2930, align 4
+  %4 = load float* %tmp3132, align 4
+  %5 = load float* %tmp3334, align 4
+  %6 = load float* %tmp3536, align 4
+  %7 = load float* %tmp3738, align 4
+  %8 = load float* %tmp3940, align 4
+  %9 = insertelement <4 x float> undef, float %6, i32 0
+  %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+  %11 = insertelement <4 x float> %10, float %7, i32 1
+  %12 = insertelement <4 x float> %11, float %8, i32 2
+  %13 = insertelement <4 x float> undef, float %2, i32 0
+  %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+  %15 = insertelement <4 x float> %14, float %3, i32 1
+  %16 = insertelement <4 x float> %15, float %4, i32 2
+  %17 = insertelement <4 x float> %16, float %5, i32 3
+  %18 = fsub <4 x float> zeroinitializer, %12
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+  %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+  ret <4 x float> %21
+}
diff --git a/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 0000000..980f8ce
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.preheader
+
+bb.preheader:                                     ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.preheader
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> undef, %3                 ; <<4 x float>> [#uses=1]
+  %5 = extractelement <4 x float> %4, i32 3       ; <float> [#uses=1]
+  store float %5, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  ret <4 x float> %6
+}
diff --git a/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 0000000..aace475
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 0000000..30931a2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..2ff479b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+  ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 0000000..6281775
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define arm_apcscc void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: fldd
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 0000000..ea2693a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+  %0 = fcmp olt float %a, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = fsub float -0.000000e+00, %a               ; <float> [#uses=1]
+  %2 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+  %3 = sub i64 0, %2                              ; <i64> [#uses=1]
+  ret i64 %3
+
+bb1:                                              ; preds = %entry
+  %4 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+  ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 0000000..53bd668
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __adddf3
+; CHECK-NOT: strd
+; CHECK: mov
+  %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %x77 = fadd double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %tmpr = fadd double %x.0, %x76                  ; <double> [#uses=1]
+  %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+  store double %tmpr, double* %agg.result.0, align 8
+  %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+  store double %x77, double* %agg.result.1, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index a3832c0..9ccff07 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 70b2c4d..b2c0314 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -o %t -f
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
 ; RUN: grep set %t   | count 5
 ; RUN: grep globl %t | count 4
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index bb336ce..d73abe6a 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep align.*1 | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm | grep align.*1 | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*2 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*3 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep align.*2 | count 4
 
 @a = global i1 true
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
index f7e450f..15cf677 100644
--- a/test/CodeGen/ARM/alloca.ll
+++ b/test/CodeGen/ARM/alloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov r11, sp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov sp, r11}
 
 define void @f(i32 %a) {
diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll
index 080827d..116a32f 100644
--- a/test/CodeGen/ARM/argaddr.ll
+++ b/test/CodeGen/ARM/argaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
diff --git a/test/CodeGen/ARM/arguments-nosplit-double.ll b/test/CodeGen/ARM/arguments-nosplit-double.ll
index 57ff95c..770e41d 100644
--- a/test/CodeGen/ARM/arguments-nosplit-double.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4059
 
 define i32 @f(i64 %z, i32 %a, double %b) {
diff --git a/test/CodeGen/ARM/arguments-nosplit-i64.ll b/test/CodeGen/ARM/arguments-nosplit-i64.ll
index 5464674..815edfd 100644
--- a/test/CodeGen/ARM/arguments-nosplit-i64.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4058
 
 define i32 @f(i64 %z, i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index 833e22d..ad5b2d6 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep {mov r0, r2} | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {mov r0, r1} | count 1
 
 define i32 @f(i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll
index eb7e45b..a515ad7 100644
--- a/test/CodeGen/ARM/arguments2.ll
+++ b/test/CodeGen/ARM/arguments2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i32 @f(i32 %a, i128 %b) {
         %tmp = call i32 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll
index 97c0405..58f64c6 100644
--- a/test/CodeGen/ARM/arguments3.ll
+++ b/test/CodeGen/ARM/arguments3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a, i128 %b) {
         %tmp = call i64 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll
index 63ba64b..f5f4207 100644
--- a/test/CodeGen/ARM/arguments4.ll
+++ b/test/CodeGen/ARM/arguments4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define float @f(i32 %a, i128 %b) {
         %tmp = call float @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll
index 2000ff7..388a8eb 100644
--- a/test/CodeGen/ARM/arguments5.ll
+++ b/test/CodeGen/ARM/arguments5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a, i128 %b) {
         %tmp = call double @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll
index a18c621..3f757fee 100644
--- a/test/CodeGen/ARM/arguments6.ll
+++ b/test/CodeGen/ARM/arguments6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i128 @f(i32 %a, i128 %b) {
         %tmp = call i128 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll
index 489ffd4..038e417 100644
--- a/test/CodeGen/ARM/arguments7.ll
+++ b/test/CodeGen/ARM/arguments7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
         %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll
index 5ff7e09..6999a4d 100644
--- a/test/CodeGen/ARM/arguments8.ll
+++ b/test/CodeGen/ARM/arguments8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
         %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index 07d928a..690f488 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
 
 define float @f(float %z, double %a, float %b) {
         %tmp = call float @g(float %b)
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index b260b13..2e35e39 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @frame_dummy() {
 entry:
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
index f1e4c2a..2739860 100644
--- a/test/CodeGen/ARM/arm-frameaddr.ll
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin  | grep mov | grep r7
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | grep r11
+; RUN: llc < %s -mtriple=arm-apple-darwin  | grep mov | grep r7
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11
 ; PR4344
 ; PR4416
 
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 553c2fb..c4b4ec6 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
+; RUN: llc < %s -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 0000000..53392de
--- /dev/null
+++ b/test/CodeGen/ARM/bfc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep "bfc " | count 3
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
index b4ea433..b16dcc6 100644
--- a/test/CodeGen/ARM/bic.ll
+++ b/test/CodeGen/ARM/bic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
+; RUN: llc < %s -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index 0ac4f9a..9e94efe 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep and      %t | count 1
 ; RUN: grep orr      %t | count 1
 ; RUN: grep eor      %t | count 1
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 437b318..0e3e070 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | not grep bx
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | not grep bx
 
 define void @test(i32 %Ptr, i8* %L) {
 entry:
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 6b19665..52246c3 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov lr, pc}
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep blx
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: llc < %s -march=arm | grep {mov lr, pc}
+; RUN: llc < %s -march=arm -mattr=+v5t | grep blx
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
 ; RUN:   -relocation-model=pic | grep {PLT}
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 1af6fad..efe29d8 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:   not grep {bx lr}
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index 3bf2dc0..294de5f 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep "adc r"
-; RUN: llvm-as < %s | llc -march=arm | grep "sbc r"  | count 2
+; RUN: llc < %s -march=arm | grep "subs r" | count 2
+; RUN: llc < %s -march=arm | grep "adc r"
+; RUN: llc < %s -march=arm | grep "sbc r"  | count 2
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index 389fb2c..d2235c9 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep clz
+; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
 
 declare i32 @llvm.ctlz.i32(i32)
 
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index fcb8b17..5f3ed1d 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fcmpes
 
 define void @test3(float* %glob, i32 %X) {
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 095157b5..e2d8ddc 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #255$} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep {mov r0.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {orr.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {cmp r0, #1, 16} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {orr.*256} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
+; RUN: llc < %s -march=arm | grep {cmp r0, #1, 16} | count 1
 
 define i32 @f1() {
         ret i32 0
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 4f4091a..0dcf9dd 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll
index 5caa5b1..fb94626 100644
--- a/test/CodeGen/ARM/ctors_dtors.ll
+++ b/test/CodeGen/ARM/ctors_dtors.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as <  %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_init_func}
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_term_func} 
-; RUN: llvm-as  < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.ctors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.dtors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.init_array,"aw",.init_array}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.fini_array,"aw",.fini_array}
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN: .section	__DATA,__mod_term_func,mod_term_funcs
+
+; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .dtors,"aw",%progbits
+
+; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .fini_array,"aw",%fini_array
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
 @llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 1085ec7..2f724e7 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep __divsi3  %t
 ; RUN: grep __udivsi3 %t
 ; RUN: grep __modsi3  %t
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index e0cd4e1..92e2d13 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
index 2e9041c..dc45ce7 100644
--- a/test/CodeGen/ARM/extloadi1.ll
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]
 
 define void @__mf_sigusr1_respond() {
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 0000000..5690a01
--- /dev/null
+++ b/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+	%0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+	ret float %dum1
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 0000000..a01f868
--- /dev/null
+++ b/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fadd float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 66acda9..bf7c305 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bic | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm | grep bic | count 2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fneg | count 2
 
 define float @test1(float %x, double %y) {
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 0000000..2af250d
--- /dev/null
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fdiv float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index 777a3d6..ebf1d84 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 0000000..1a1cd07
--- /dev/null
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll
index 315e623..eb72faf 100644
--- a/test/CodeGen/ARM/fmdrr-fmrrd.ll
+++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmdrr
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
 
 ; naive codegen for this is:
 ; _i:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 0000000..c6e6d40
--- /dev/null
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 0000000..cb5dade
--- /dev/null
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 0000000..7da443d
--- /dev/null
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+
+define float @test1(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+
+define float @test2(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 0000000..e57bbbb
--- /dev/null
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 0000000..3ae437d
--- /dev/null
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fsub float -0.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define float @test2(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fmul float -1.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 7bbda2d..613b347 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 0000000..efd87d2
--- /dev/null
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float -0.0, %0
+	ret float %1
+}
+
+define float @test2(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fmul float -1.0, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll
index 6d6d108..4ac10ba 100644
--- a/test/CodeGen/ARM/formal.ll
+++ b/test/CodeGen/ARM/formal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 declare void @bar(i64 %x, i64 %y)
 
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ba199db..4e4ef72 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -1,55 +1,71 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fmsr %t | count 4
-; RUN: grep fsitos %t
-; RUN: grep fmrs %t | count 2
-; RUN: grep fsitod %t
-; RUN: grep fmrrd %t | count 3
-; RUN: not grep fmdrr %t 
-; RUN: grep fldd %t
-; RUN: grep fuitod %t
-; RUN: grep fuitos %t
-; RUN: grep 1065353216 %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f(i32 %a) {
+;CHECK: f:
+;CHECK: fmsr
+;CHECK-NEXT: fsitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @g(i32 %a) {
+;CHECK: g:
+;CHECK: fmsr
+;CHECK-NEXT: fsitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define double @uint_to_double(i32 %a) {
+;CHECK: uint_to_double:
+;CHECK: fmsr
+;CHECK-NEXT: fuitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @uint_to_float(i32 %a) {
+;CHECK: uint_to_float:
+;CHECK: fmsr
+;CHECK-NEXT: fuitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @h(double* %v) {
+;CHECK: h:
+;CHECK: fldd
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @h2() {
+;CHECK: h2:
+;CHECK: 1065353216
 entry:
         ret float 1.000000e+00
 }
 
 define double @f2(double %a) {
+;CHECK: f2:
+;CHECK-NOT: fmdrr
         ret double %a
 }
 
 define void @f3() {
+;CHECK: f3:
+;CHECK-NOT: fmdrr
+;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
         call void @f4( double %tmp )
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 0000000..9ce2ac5
--- /dev/null
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: ftosizs s0, s0
+; NEON: test1:
+; NEON: vcvt.s32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptosi float %0 to i32
+	ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: ftouizs s0, s0
+; NEON: test2:
+; NEON: vcvt.u32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptoui float %0 to i32
+	ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: fuitos s0, s0
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = uitofp i32 %0 to float
+	ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: fsitos s0, s0
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = sitofp i32 %0 to float
+	ret float %1
+}
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 568a6c4..ebeeb18 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,74 +1,88 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fadds %t
-; RUN: grep faddd %t
-; RUN: grep fmuls %t
-; RUN: grep fmuld %t
-; RUN: grep eor %t
-; RUN: grep fnegd %t
-; RUN: grep fdivs %t
-; RUN: grep fdivd %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
+;CHECK: f1:
+;CHECK: fadds
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f2(double %a, double %b) {
+;CHECK: f2:
+;CHECK: faddd
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f3(float %a, float %b) {
+;CHECK: f3:
+;CHECK: fmuls
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f4(double %a, double %b) {
+;CHECK: f4:
+;CHECK: fmuld
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f5(float %a, float %b) {
+;CHECK: f5:
+;CHECK: fsubs
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f6(double %a, double %b) {
+;CHECK: f6:
+;CHECK: fsubd
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f7(float %a) {
+;CHECK: f7:
+;CHECK: eor
 entry:
 	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f8(double %a) {
+;CHECK: f8:
+;CHECK: fnegd
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f9(float %a, float %b) {
+;CHECK: f9:
+;CHECK: fdivs
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f10(double %a, double %b) {
+;CHECK: f10:
+;CHECK: fdivd
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f11(float %a) {
+;CHECK: f11:
+;CHECK: bic
 entry:
 	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
 	ret float %tmp1
@@ -77,6 +91,8 @@ entry:
 declare float @fabsf(float)
 
 define double @f12(double %a) {
+;CHECK: f12:
+;CHECK: fabsd
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index ce0f4029..2c9591c 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep movmi %t
-; RUN: grep moveq %t
-; RUN: grep movgt %t
-; RUN: grep movge %t
-; RUN: grep movne %t
-; RUN: grep fcmped %t | count 1
-; RUN: grep fcmpes %t | count 6
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define i32 @f1(float %a) {
+;CHECK: f1:
+;CHECK: fcmpes
+;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp1 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -15,6 +11,9 @@ entry:
 }
 
 define i32 @f2(float %a) {
+;CHECK: f2:
+;CHECK: fcmpes
+;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp2 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -22,6 +21,9 @@ entry:
 }
 
 define i32 @f3(float %a) {
+;CHECK: f3:
+;CHECK: fcmpes
+;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp3 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -29,6 +31,9 @@ entry:
 }
 
 define i32 @f4(float %a) {
+;CHECK: f4:
+;CHECK: fcmpes
+;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp4 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -36,6 +41,9 @@ entry:
 }
 
 define i32 @f5(float %a) {
+;CHECK: f5:
+;CHECK: fcmpes
+;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp5 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -43,6 +51,9 @@ entry:
 }
 
 define i32 @f6(float %a) {
+;CHECK: f6:
+;CHECK: fcmpes
+;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp6 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -50,6 +61,9 @@ entry:
 }
 
 define i32 @g1(double %a) {
+;CHECK: g1:
+;CHECK: fcmped
+;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
         %tmp7 = zext i1 %tmp to i32              ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 3e749af..67f70e9 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq 
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep movvs
+; RUN: llc < %s -march=arm | grep moveq 
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
 
 define i32 @f7(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 218b25f..ee3c338 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,81 +1,101 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fcvtsd %t
-; RUN: grep fcvtds %t
-; RUN: grep ftosizs %t
-; RUN: grep ftouizs %t
-; RUN: grep ftosizd %t
-; RUN: grep ftouizd %t
-; RUN: grep fsitos %t
-; RUN: grep fsitod %t
-; RUN: grep fuitos %t
-; RUN: grep fuitod %t
-; RUN: llvm-as < %s | llc -march=arm > %t
-; RUN: grep truncdfsf2 %t
-; RUN: grep extendsfdf2 %t
-; RUN: grep fixsfsi %t
-; RUN: grep fixunssfsi %t
-; RUN: grep fixdfsi %t
-; RUN: grep fixunsdfsi %t
-; RUN: grep floatsisf %t
-; RUN: grep floatsidf %t
-; RUN: grep floatunsisf %t
-; RUN: grep floatunsidf %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define float @f1(double %x) {
+;CHECK-VFP: f1:
+;CHECK-VFP: fcvtsd
+;CHECK: f1:
+;CHECK: truncdfsf2
 entry:
 	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f2(float %x) {
+;CHECK-VFP: f2:
+;CHECK-VFP: fcvtds
+;CHECK: f2:
+;CHECK: extendsfdf2
 entry:
 	%tmp1 = fpext float %x to double		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define i32 @f3(float %x) {
+;CHECK-VFP: f3:
+;CHECK-VFP: ftosizs
+;CHECK: f3:
+;CHECK: fixsfsi
 entry:
 	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f4(float %x) {
+;CHECK-VFP: f4:
+;CHECK-VFP: ftouizs
+;CHECK: f4:
+;CHECK: fixunssfsi
 entry:
 	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f5(double %x) {
+;CHECK-VFP: f5:
+;CHECK-VFP: ftosizd
+;CHECK: f5:
+;CHECK: fixdfsi
 entry:
 	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f6(double %x) {
+;CHECK-VFP: f6:
+;CHECK-VFP: ftouizd
+;CHECK: f6:
+;CHECK: fixunsdfsi
 entry:
 	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define float @f7(i32 %a) {
+;CHECK-VFP: f7:
+;CHECK-VFP: fsitos
+;CHECK: f7:
+;CHECK: floatsisf
 entry:
 	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f8(i32 %a) {
+;CHECK-VFP: f8:
+;CHECK-VFP: fsitod
+;CHECK: f8:
+;CHECK: floatsidf
 entry:
 	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f9(i32 %a) {
+;CHECK-VFP: f9:
+;CHECK-VFP: fuitos
+;CHECK: f9:
+;CHECK: floatunsisf
 entry:
 	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f10(i32 %a) {
+;CHECK-VFP: f10:
+;CHECK-VFP: fuitod
+;CHECK: f10:
+;CHECK: floatunsidf
 entry:
 	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 13653bb..fa897bf 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {flds.*\\\[} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {fsts.*\\\[} | count 1
 
 define float @f1(float %a) {
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
index 461a2c9..6d48792 100644
--- a/test/CodeGen/ARM/fpow.ll
+++ b/test/CodeGen/ARM/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll
index ab09fff..174106b 100644
--- a/test/CodeGen/ARM/fpowi.ll
+++ b/test/CodeGen/ARM/fpowi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep powidf2
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
 ; PR1287
 
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 41168ac..0d270b0 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
 @u = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 0000000..060dd46
--- /dev/null
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fsub float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 0000000..4abf04b
--- /dev/null
+++ b/test/CodeGen/ARM/hardfloat_neon.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+        %tmp1 = mul <16 x i8> %A, %B
+        ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+        %tmp = call <16 x i8> @g(<16 x i8> %b)
+        ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 16231da..ccdc7bf 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
index 6cf69aa..90f5308 100644
--- a/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
index 4477f2a..3bd710a 100644
--- a/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -1,12 +1,15 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep long | count 4
+; RUN: llc < %s -mtriple=arm-apple-darwin9   | FileCheck %s
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: LCPI1_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI1_1:
+; CHECK-NEXT: .long _y
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll
index 93f81ec..3544ae8 100644
--- a/test/CodeGen/ARM/hidden-vis.ll
+++ b/test/CodeGen/ARM/hidden-vis.ll
@@ -1,18 +1,23 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
-
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index ede6d74..1054f27 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& \
+; RUN: llc < %s -march=arm -stats |& \
 ; RUN:   grep {3 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as: ARM:
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index 7d42955..e6aa044 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bx | count 1
 
 define i32 @t1(i32 %a, i32 %b) {
 	%tmp2 = icmp eq i32 %a, 0
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index 3942061..ce57d73 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bxlt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxge | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bxlt | count 1
+; RUN: llc < %s -march=arm | grep bxgt | count 1
+; RUN: llc < %s -march=arm | grep bxge | count 1
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	%tmp2 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 620bcbe..f7ebac6 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep cmpne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 2
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep cmpne | count 1
+; RUN: llc < %s -march=arm | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	switch i32 %c, label %cond_next [
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index ce5a679..f28c61b 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep subgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep suble | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep subgt | count 1
+; RUN: llc < %s -march=arm | grep suble | count 1
 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
 
 define i32 @t(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index f8d4f82..e9145ac 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep blge | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
@@ -11,6 +10,8 @@ entry:
 }
 
 define void @t1(i32 %a, i32 %b) {
+; CHECK: t1:
+; CHECK: ldmltfd sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 63c4a08..5824115 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,10 +1,6 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index 6bb4b56..f9cf88f 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpeq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 85bd8c7..6cb8e7b 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,7 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
index bbd2f2e..05bdc45 100644
--- a/test/CodeGen/ARM/ifcvt9.ll
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define fastcc void @t() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index ad24eb5..febe6f5 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll
index 998adba..6f25f9d 100644
--- a/test/CodeGen/ARM/imm.ll
+++ b/test/CodeGen/ARM/imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep CPI
+; RUN: llc < %s -march=arm | not grep CPI
 
 define i32 @test1(i32 %A) {
         %B = add i32 %A, -268435441             ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll
index 2ceceae..45dfcf0 100644
--- a/test/CodeGen/ARM/inlineasm-imm-arm.ll
+++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 ; Test ARM-mode "I" constraint, for any Data Processing immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll
index 2f7332a..d522348 100644
--- a/test/CodeGen/ARM/inlineasm.ll
+++ b/test/CodeGen/ARM/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test1(i32 %tmp54) {
 	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll
index 69394eb..a99bccf 100644
--- a/test/CodeGen/ARM/inlineasm2.ll
+++ b/test/CodeGen/ARM/inlineasm2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @__ieee754_sqrt(double %x) {
 	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index f203443..59f0d53 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 7e8eb42..5116ac8 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov r0, r0, lsr #31}
+; RUN: llc < %s -march=arm | grep {mov r0, r0, lsr #31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index b1738a4..ddf0f0e 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 6a05457..774b3c0 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmia | count 2
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmib | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {ldmfd sp\!} | count 3
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index ea99655..954fb5b 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=arm | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=arm | not grep mvn
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsr
+; RUN: llc < %s -march=arm | grep {ldr r0} | count 7
+; RUN: llc < %s -march=arm | grep mov | grep 1
+; RUN: llc < %s -march=arm | not grep mvn
+; RUN: llc < %s -march=arm | grep ldr | grep lsl
+; RUN: llc < %s -march=arm | grep ldr | grep lsr
 
 define i32 @f1(i32* %v) {
 entry:
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index b99c721..d29eb02 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -1,27 +1,36 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsh | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+define i32 @test1(i8* %t1) nounwind {
+; CHECK: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; CHECK: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; CHECK: ldrsb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; CHECK: ldrsh
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; CHECK: mov r0, #0
+; CHECK: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index 4431506..a3abdb6 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep mov
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 0491563..97a48e1 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 7e44742..7c44284 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\!} | count 2
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index f1bee05..8f7ae55 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,12 +1,20 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
 ; rdar://r6949835
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
+;V6:      ldrd r2, [r2]
+
+;V5:      ldr r3, [r2]
+;V5-NEXT: ldr r2, [r2, #+4]
+
+;EABI:      ldr r3, [r2]
+;EABI-NEXT: ldr r2, [r2, #+4]
+
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 8896ead..56a4a47 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -1,14 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=static | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | \
 ; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | \
 ; RUN:   grep {GOT} | count 1
 
 @G = external global i32
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 0509732..253b0e1 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep ldrsb %t
 ; RUN: grep ldrb %t
 ; RUN: grep ldrsh %t
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index 4bab330..c76a5e4 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep cmp | count 1
+; RUN: llc < %s -march=arm | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index fe0ee54..2fcaac0 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep -- {-2147483648} | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep adds | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep adc | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {subs } | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | grep mvn | count 3
+; RUN: llc < %s -march=arm | grep adds | count 1
+; RUN: llc < %s -march=arm | grep adc | count 1
+; RUN: llc < %s -march=arm | grep {subs } | count 1
+; RUN: llc < %s -march=arm | grep sbc | count 1
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep smull | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep umull | count 1
 
 define i64 @f1() {
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 55d0cdc..057b5f0 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep rrx %t | count 1
 ; RUN: grep __ashldi3 %t
 ; RUN: grep __ashrdi3 %t
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 3881e91..507ec2c 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed}
-; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
 ; the constant and do a store.  We do *not* want something like this:
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 02902f2..8130019 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll
index e983165..f46c7a5 100644
--- a/test/CodeGen/ARM/mem.ll
+++ b/test/CodeGen/ARM/mem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep strb
-; RUN: llvm-as < %s | llc -march=arm | grep strh
+; RUN: llc < %s -march=arm | grep strb
+; RUN: llc < %s -march=arm | grep strh
 
 define void @f1() {
 entry:
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 4bf0b4f..ed20c32 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
 
 	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 0b58bf6..41d5944 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f() {
 entry:
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
new file mode 100644
index 0000000..85407fa
--- /dev/null
+++ b/test/CodeGen/ARM/mls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
index 3543b5d..466a802 100644
--- a/test/CodeGen/ARM/mul.ll
+++ b/test/CodeGen/ARM/mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mul | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | count 2
+; RUN: llc < %s -march=arm | grep mul | count 2
+; RUN: llc < %s -march=arm | grep lsl | count 2
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 0000000..93188cd
--- /dev/null
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
index de75e96..148f291 100644
--- a/test/CodeGen/ARM/mulhi.ll
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep smmul | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep umull | count 1
+; RUN: llc < %s -march=arm | grep umull | count 1
 
 define i32 @smulhi(i32 %x, i32 %y) {
         %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index a7ef907..571c21a 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 8
+; RUN: llc < %s -march=arm | grep mvn | count 8
 
 define i32 @f1() {
 entry:
diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll
index 18b516f..5892737 100644
--- a/test/CodeGen/ARM/neon_arith1.ll
+++ b/test/CodeGen/ARM/neon_arith1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vadd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
 
 define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 8901ba1..2796dec 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fstd
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index a26904a..547bab7 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 151beac..1e2e7aa 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhbt | count 5
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhtb | count 4
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll
index dee3fc4..606d969 100644
--- a/test/CodeGen/ARM/pr3502.ll
+++ b/test/CodeGen/ARM/pr3502.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-none-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
 ;pr3502
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index e5eeccb..03376a4 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi > %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: egrep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 454d36b..ba9699e 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin 
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
+; RUN: llc < %s -mtriple=arm-apple-darwin 
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll
index 792b169..5c312eb 100644
--- a/test/CodeGen/ARM/ret0.ll
+++ b/test/CodeGen/ARM/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test() {
         ret i32 0
diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll
index 48a1fda..1ab947b 100644
--- a/test/CodeGen/ARM/ret_arg1.ll
+++ b/test/CodeGen/ARM/ret_arg1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1) {
         ret i32 %a1
diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll
index a74870f..84477d0 100644
--- a/test/CodeGen/ARM/ret_arg2.ll
+++ b/test/CodeGen/ARM/ret_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2) {
         ret i32 %a2
diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll
index 9210e7b..f7f9057 100644
--- a/test/CodeGen/ARM/ret_arg3.ll
+++ b/test/CodeGen/ARM/ret_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
         ret i32 %a3
 }
diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll
index a9c66e9..f7b3e4a 100644
--- a/test/CodeGen/ARM/ret_arg4.ll
+++ b/test/CodeGen/ARM/ret_arg4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
         ret i32 %a4
diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll
index 620a017..c4f9fb5 100644
--- a/test/CodeGen/ARM/ret_arg5.ll
+++ b/test/CodeGen/ARM/ret_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
         ret i32 %a5
diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll
index 287d92b..2bafea6 100644
--- a/test/CodeGen/ARM/ret_f32_arg2.ll
+++ b/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32(float %a1, float %a2) {
         ret float %a2
diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll
index 3418be9..c6ce60e 100644
--- a/test/CodeGen/ARM/ret_f32_arg5.ll
+++ b/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
         ret float %a5
diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll
index 66848d5..386e85f 100644
--- a/test/CodeGen/ARM/ret_f64_arg2.ll
+++ b/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_f64(double %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
index 626ee6f..bdb0a60 100644
--- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mcpu=arm8 -mattr=+vfp2
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
 
 define double @test_double_arg_reg_split(i32 %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll
index b03b604..4f841a3 100644
--- a/test/CodeGen/ARM/ret_f64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
         ret double %a3
diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll
index ba3ec7f..2144317 100644
--- a/test/CodeGen/ARM/ret_f64_arg_stack.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
         ret double %a4
diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll
index 0fe98e6..908c34f 100644
--- a/test/CodeGen/ARM/ret_i128_arg2.ll
+++ b/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
         ret i128 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
index b015a96..b1a1024 100644
--- a/test/CodeGen/ARM/ret_i64_arg2.ll
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64(i64 %a1, i64 %a2) {
         ret i64 %a2
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
index 5dfecca..ffc1d2f 100644
--- a/test/CodeGen/ARM/ret_i64_arg3.ll
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll
index 5bd5cb2..956bce5 100644
--- a/test/CodeGen/ARM/ret_i64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll
index 68db8c4..2b7ae05 100644
--- a/test/CodeGen/ARM/ret_void.ll
+++ b/test/CodeGen/ARM/ret_void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test() {
         ret void
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 68f6264..1c12268 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep rev16
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep revsh
+; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
+; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
 
 define i32 @test1(i32 %X) {
         %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=3]
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 0000000..923f52a
--- /dev/null
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = ashr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: ubfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = lshr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = ashr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = lshr i32 %tmp, 29
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll
index aa65845..7a566d4 100644
--- a/test/CodeGen/ARM/section.ll
+++ b/test/CodeGen/ARM/section.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {__DTOR_END__:}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {\\.section.\\.dtors,"aw",.progbits}
 
 @__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 5148a5b..85c8b5b 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movlt | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep movle | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movls | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movhi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcpydmi | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
 
 define i32 @f1(i32 %a.s) {
+;CHECK: f1:
+;CHECK: moveq
 entry:
     %tmp = icmp eq i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -15,6 +11,8 @@ entry:
 }
 
 define i32 @f2(i32 %a.s) {
+;CHECK: f2:
+;CHECK: movgt
 entry:
     %tmp = icmp sgt i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -22,6 +20,8 @@ entry:
 }
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
+;CHECK: f3:
+;CHECK: movlt
 entry:
     %tmp = icmp slt i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -29,6 +29,8 @@ entry:
 }
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
+;CHECK: f4:
+;CHECK: movle
 entry:
     %tmp = icmp sle i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -36,6 +38,8 @@ entry:
 }
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
+;CHECK: f5:
+;CHECK: movls
 entry:
     %tmp = icmp ule i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -43,6 +47,8 @@ entry:
 }
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
+;CHECK: f6:
+;CHECK: movhi
 entry:
     %tmp = icmp ugt i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -50,6 +56,11 @@ entry:
 }
 
 define double @f7(double %a, double %b) {
+;CHECK: f7:
+;CHECK: movlt
+;CHECK: movlt
+;CHECK-VFP: f7:
+;CHECK-VFP: fcpydmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 6855e32..7fd91ce 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mov | count 2
+; RUN: llc < %s -march=arm | grep mov | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index cae1c44..2bbe9fd 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep add | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep bic | grep asr
+; RUN: llc < %s -march=arm | grep add | grep lsl
+; RUN: llc < %s -march=arm | grep bic | grep asr
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index 7a4e488..b7ab2e7 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -mattr=+v5TE
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smulbt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smultt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smlabt | count 1
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 0000000..f4b27a7
--- /dev/null
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index c3dd65a..1dd57dd 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep add | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index ed5e4c5..22a7ecb 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index ba81380..801b9ce 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index e9f1945..f8d3df2 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index c02663f..e56e3f2 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll
index 77c66ec..2f1166b6 100644
--- a/test/CodeGen/ARM/str_trunc.ll
+++ b/test/CodeGen/ARM/str_trunc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strb | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strh | count 1
 
 define void @test1(i32 %v, i16* %ptr) {
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index e9f302c..4752f17 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 0000000..848a4df
--- /dev/null
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #:lower16:65537123
+; CHECK: movt r0, #:upper16:65537123
+    %tmp = add i32 0, 65537123
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 6476b48..3143387 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
 
 define i8* @test() {
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 6866a42..1087094 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 90e3bcf..32847208 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df2913b..df7a4ca 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {tbss}
 
 %struct.anon = type { i32, i32 }
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
index 6111ec9..3033c2b 100644
--- a/test/CodeGen/ARM/trunc_ldr.ll
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb.*7 | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
 
 	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
 	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 0e85fb6..2da08b6 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep orr
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep orr
+; RUN: llc < %s -march=arm | not grep mov
 
 define void @bar(i8* %P, i16* %Q) {
 entry:
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
index bdeee3f..c83111e 100644
--- a/test/CodeGen/ARM/tst_teq.ll
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep tst
-; RUN: llvm-as < %s | llc -march=arm | grep teq
+; RUN: llc < %s -march=arm | grep tst
+; RUN: llc < %s -march=arm | grep teq
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
index 055c3c3..32eb225 100644
--- a/test/CodeGen/ARM/uint64tof64.ll
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index dad1897..fcaa2b3 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,16 +1,31 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -o %t -f
-; RUN: grep ldrb %t | count 4
-; RUN: grep strb %t | count 4
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7
 
+; rdar://7113725
 
-	%struct.p = type <{ i8, i32 }>
-@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
-@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
-
-define i32 @main() {
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=2]
-	store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	ret i32 %tmp3
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+; V7: t:
+; V7: ldr r1
+; V7: str r1
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
 }
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
index 149afc4..bd28034 100644
--- a/test/CodeGen/ARM/unord.ll
+++ b/test/CodeGen/ARM/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep movne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
+; RUN: llc < %s -march=arm | grep movne | count 1
+; RUN: llc < %s -march=arm | grep moveq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
index 09c74eb..6307795 100644
--- a/test/CodeGen/ARM/uxt_rot.ll
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxth | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/ARM/uxtb.ll b/test/CodeGen/ARM/uxtb.ll
index 73e918b..9d6e4bd 100644
--- a/test/CodeGen/ARM/uxtb.ll
+++ b/test/CodeGen/ARM/uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | \
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 98ee1e1..e2dca46 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vaba\\.s8} %t | count 2
-; RUN: grep {vaba\\.s16} %t | count 2
-; RUN: grep {vaba\\.s32} %t | count 2
-; RUN: grep {vaba\\.u8} %t | count 2
-; RUN: grep {vaba\\.u16} %t | count 2
-; RUN: grep {vaba\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -15,6 +11,8 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +21,8 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -31,6 +31,8 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -39,6 +41,8 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -47,6 +51,8 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -55,6 +61,8 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -63,6 +71,8 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -71,6 +81,8 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -79,6 +91,8 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -87,6 +101,8 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -95,6 +111,8 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -117,3 +135,71 @@ declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) no
 declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 0fe5ddb..2b45393 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabd\\.s8} %t | count 2
-; RUN: grep {vabd\\.s16} %t | count 2
-; RUN: grep {vabd\\.s32} %t | count 2
-; RUN: grep {vabd\\.u8} %t | count 2
-; RUN: grep {vabd\\.u16} %t | count 2
-; RUN: grep {vabd\\.u32} %t | count 2
-; RUN: grep {vabd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -15,6 +10,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -22,6 +19,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -29,6 +28,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -36,6 +37,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -43,6 +46,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -50,13 +55,17 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -64,6 +73,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -71,6 +82,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -78,6 +91,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -85,6 +100,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -92,6 +109,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -99,9 +118,11 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -113,7 +134,7 @@ declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +144,66 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 629baa7..18ba61f 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -1,64 +1,131 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabs\\.s8} %t | count 2
-; RUN: grep {vabs\\.s16} %t | count 2
-; RUN: grep {vabs\\.s32} %t | count 2
-; RUN: grep {vabs\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
 
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index b2b0e23..9fa5307 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vadd\\.i8} %t | count 2
-; RUN: grep {vadd\\.i16} %t | count 2
-; RUN: grep {vadd\\.i32} %t | count 2
-; RUN: grep {vadd\\.i64} %t | count 2
-; RUN: grep {vadd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = add <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = add <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = add <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = add <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = add <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = add <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = add <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = add <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = add <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = add <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 4bf79c0..5f3536c 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]
 
 define i32 @main() {
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index 1f2f05b..e4ef9e3 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {add sp, sp, #16} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
-; RUN:   grep {add sp, sp, #12} | count 2
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
 
 define i32 @f(i32 %a, ...) {
 entry:
@@ -18,4 +16,8 @@ entry:
 return:		; preds = %entry
 	%retval2 = load i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
+; EABI: add sp, sp, #12
+; EABI: add sp, sp, #16
+; OABI: add sp, sp, #12
+; OABI: add sp, sp, #12
 }
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 0000000..e1d23a1
--- /dev/null
+++ b/test/CodeGen/ARM/vbits.ll
@@ -0,0 +1,507 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = and <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = and <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = and <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = and <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = and <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = and <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = or <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = or <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = or <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = or <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 37ddf4d..9f3bb4e 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vbsl %t | count 8
-; Note: function names do not include "vbsl" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +14,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -25,6 +27,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -36,6 +40,8 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = load <1 x i64>* %C
@@ -47,6 +53,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -58,6 +66,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -69,6 +79,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -80,6 +92,8 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = load <2 x i64>* %C
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index 77f1890..e478751 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -1,61 +1,81 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep {vceq\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp eq <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp eq <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp eq <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oeq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp eq <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp eq <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp eq <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oeq <4 x float> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 14c623e..2c16111 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcge\\.s8} %t | count 2
-; RUN: grep {vcge\\.s16} %t | count 2
-; RUN: grep {vcge\\.s32} %t | count 2
-; RUN: grep {vcge\\.u8} %t | count 2
-; RUN: grep {vcge\\.u16} %t | count 2
-; RUN: grep {vcge\\.u32} %t | count 2
-; RUN: grep {vcge\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp uge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp uge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp uge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp uge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp uge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp uge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oge <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 3f7e550..6b11ba5 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcgt\\.s8} %t | count 2
-; RUN: grep {vcgt\\.s16} %t | count 2
-; RUN: grep {vcgt\\.s32} %t | count 2
-; RUN: grep {vcgt\\.u8} %t | count 2
-; RUN: grep {vcgt\\.u16} %t | count 2
-; RUN: grep {vcgt\\.u32} %t | count 2
-; RUN: grep {vcgt\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sgt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sgt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sgt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ugt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ugt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ugt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ogt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sgt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sgt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sgt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ugt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ugt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ugt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp ogt <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 9817168..450f90d 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,13 +1,16 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcnt\\.8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
@@ -15,3 +18,115 @@ define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
 declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 0000000..e673305
--- /dev/null
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %tmp3
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 1cb42bf..f4cc536 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,53 +1,140 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcvt\\.s32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.u32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.s32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
 
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 1c0887a..c9a68ca 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -1,9 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vdup.8 %t | count 4
-; RUN: grep vdup.16 %t | count 4
-; RUN: grep vdup.32 %t | count 8
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
@@ -16,6 +15,8 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
 }
 
 define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
@@ -24,18 +25,24 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
 }
 
 define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
@@ -56,6 +63,8 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
 }
 
 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
@@ -68,6 +77,8 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
 }
 
 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
@@ -76,6 +87,8 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
 }
 
 define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
@@ -86,49 +99,171 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; Check to make sure it works with shuffles, too.
 
 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
 	ret <4 x float> %tmp2
 }
+
+define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupfloat2:
+;CHECK: vdup.32
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupQfloat2:
+;CHECK: vdup.32
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
new file mode 100644
index 0000000..20d953b
--- /dev/null
+++ b/test/CodeGen/ARM/vext.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define arm_apcscc <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 58c2068..6946d02 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -1,96 +1,139 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.f32} %t | count 1
-; RUN: grep {vcgt\\.f32} %t | count 9
-; RUN: grep {vcge\\.f32} %t | count 5
-; RUN: grep vorr %t | count 4
-; RUN: grep vmvn %t | count 7
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vfcmp operations that do not map directly to NEON instructions.
+; This tests fcmp operations that do not map directly to NEON instructions.
 
 ; une is implemented with VCEQ/VMVN
 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp une <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; olt is implemented with VCGT
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp olt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ole is implemented with VCGE
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ole <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uge is implemented with VCGT/VMVN
 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ule is implemented with VCGT/VMVN
 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ule <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ugt is implemented with VCGE/VMVN
 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ugt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ult is implemented with VCGE/VMVN
 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ult <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ueq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; one is implemented with VCGT/VCGT/VORR
 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp one <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uno is implemented with VCGT/VCGE/VORR/VMVN
 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uno <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ord is implemented with VCGT/VCGE/VORR
 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ord <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index f58da44..50000e31 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,19 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fabs | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fmscs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcvt | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fuito | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fto.i | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bmi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcmpezs | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -28,16 +13,20 @@ declare float @fabsf(float)
 declare double @fabs(double)
 
 define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fabss
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fabsd
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
 
 define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
 	%a = load float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
@@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) {
 }
 
 define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fcvtds
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fcvtsd
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) {
 }
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
+;CHECK: fmscs
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 }
 
 define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftosizs
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftouizs
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftosizd
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftouizd
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: fuitod
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: fuitod
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]
@@ -111,6 +118,8 @@ entry:
 	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
 	br i1 %tmp6, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
@@ -129,8 +138,10 @@ declare i32 @bar(...)
 declare i32 @baz(...)
 
 define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
+;CHECK: fcmpezs
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index a361ba2..f0df798 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -1,11 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmov\\.s8} %t | count 2
-; RUN: grep {vmov\\.s16} %t | count 2
-; RUN: grep {vmov\\.u8} %t | count 2
-; RUN: grep {vmov\\.u16} %t | count 2
-; RUN: grep {vmov\\.32} %t | count 2
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
 
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -13,6 +12,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -20,6 +21,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 }
 
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -27,6 +30,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -35,6 +40,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
@@ -42,6 +49,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -49,6 +58,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -56,6 +67,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -63,6 +76,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -71,8 +86,127 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
 	ret i32 %tmp3
 }
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+	ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: fcpys
+;CHECK: fcpys
+entry:
+  %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+  ret <2 x float> %0
+}
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 5e7503d..379e062 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhadd\\.s8} %t | count 2
-; RUN: grep {vhadd\\.s16} %t | count 2
-; RUN: grep {vhadd\\.s32} %t | count 2
-; RUN: grep {vhadd\\.u8} %t | count 2
-; RUN: grep {vhadd\\.u16} %t | count 2
-; RUN: grep {vhadd\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -105,3 +123,127 @@ declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind rea
 declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 32a66e5..0f0d027 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhsub\\.s8} %t | count 2
-; RUN: grep {vhsub\\.s16} %t | count 2
-; RUN: grep {vhsub\\.s32} %t | count 2
-; RUN: grep {vhsub\\.u8} %t | count 2
-; RUN: grep {vhsub\\.u16} %t | count 2
-; RUN: grep {vhsub\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index 86858f9..2d8cb89 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -1,85 +1,113 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep vmvn %t | count 6
-; RUN: grep {vcgt\\.s8} %t | count 1
-; RUN: grep {vcge\\.s16} %t | count 1
-; RUN: grep {vcgt\\.u16} %t | count 1
-; RUN: grep {vcge\\.u32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vicmp operations that do not map directly to NEON instructions.
+; This tests icmp operations that do not map directly to NEON instructions.
 ; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
 ; and less-than-or-equal (le/ule) are implemented by swapping the arguments
 ; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
 ; the other operations.
 
 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ne <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ne <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ne <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ne <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ne <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ne <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp slt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sle <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ult <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ule <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 0000000..f5383aa
--- /dev/null
+++ b/test/CodeGen/ARM/vld1.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;CHECK: vld1.8
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
+	ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
+	ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
+	ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
+	ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
+	ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;CHECK: vld1.8
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;CHECK: vld1.16
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
+	ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
+	ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
+	ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
+	ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 0000000..23f7d2c
--- /dev/null
+++ b/test/CodeGen/ARM/vld2.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 0000000..207dc6a
--- /dev/null
+++ b/test/CodeGen/ARM/vld3.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;CHECK: vld3.8
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 0000000..0624f29
--- /dev/null
+++ b/test/CodeGen/ARM/vld4.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;CHECK: vld4.8
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;CHECK: vld4.16
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 0000000..53881a3f
--- /dev/null
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;CHECK: vld2.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;CHECK: vld2.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;CHECK: vld2.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;CHECK: vld2.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i8> %tmp3, %tmp4
+        %tmp7 = add <8 x i8> %tmp5, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;CHECK: vld3.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i16> %tmp3, %tmp4
+        %tmp7 = add <4 x i16> %tmp5, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x i32> %tmp3, %tmp4
+        %tmp7 = add <2 x i32> %tmp5, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x float> %tmp3, %tmp4
+        %tmp7 = add <2 x float> %tmp5, %tmp6
+	ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;CHECK: vld3.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i16> %tmp3, %tmp4
+        %tmp7 = add <8 x i16> %tmp5, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i32> %tmp3, %tmp4
+        %tmp7 = add <4 x i32> %tmp5, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x float> %tmp3, %tmp4
+        %tmp7 = add <4 x float> %tmp5, %tmp6
+	ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;CHECK: vld4.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i8> %tmp3, %tmp4
+        %tmp8 = add <8 x i8> %tmp5, %tmp6
+        %tmp9 = add <8 x i8> %tmp7, %tmp8
+	ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;CHECK: vld4.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i16> %tmp3, %tmp4
+        %tmp8 = add <4 x i16> %tmp5, %tmp6
+        %tmp9 = add <4 x i16> %tmp7, %tmp8
+	ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;CHECK: vld4.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x i32> %tmp3, %tmp4
+        %tmp8 = add <2 x i32> %tmp5, %tmp6
+        %tmp9 = add <2 x i32> %tmp7, %tmp8
+	ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x float> %tmp3, %tmp4
+        %tmp8 = add <2 x float> %tmp5, %tmp6
+        %tmp9 = add <2 x float> %tmp7, %tmp8
+	ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;CHECK: vld4.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i16> %tmp3, %tmp4
+        %tmp8 = add <8 x i16> %tmp5, %tmp6
+        %tmp9 = add <8 x i16> %tmp7, %tmp8
+	ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;CHECK: vld4.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i32> %tmp3, %tmp4
+        %tmp8 = add <4 x i32> %tmp5, %tmp6
+        %tmp9 = add <4 x i32> %tmp7, %tmp8
+	ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x float> %tmp3, %tmp4
+        %tmp8 = add <4 x float> %tmp5, %tmp6
+        %tmp9 = add <4 x float> %tmp7, %tmp8
+	ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 0000000..e3527c1
--- /dev/null
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index ed77e11..8405218 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmla\\.i8} %t | count 2
-; RUN: grep {vmla\\.i16} %t | count 2
-; RUN: grep {vmla\\.i32} %t | count 2
-; RUN: grep {vmla\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = add <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index d519b7e..c89552e 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmls\\.i8} %t | count 2
-; RUN: grep {vmls\\.i16} %t | count 2
-; RUN: grep {vmls\\.i32} %t | count 2
-; RUN: grep {vmls\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = sub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index af9c8e2..ed69f97 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -1,101 +1,303 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vmov.i8 %t | count 2
-; RUN: grep vmov.i16 %t | count 4
-; RUN: grep vmov.i32 %t | count 12
-; RUN: grep vmov.i64 %t | count 2
-; Note: function names do not include "vmov" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index eb9ae7b..325da5d 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmul\\.i8} %t | count 2
-; RUN: grep {vmul\\.i16} %t | count 2
-; RUN: grep {vmul\\.i32} %t | count 2
-; RUN: grep {vmul\\.f32} %t | count 2
-; RUN: grep {vmul\\.p8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = mul <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = mul <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = mul <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = mul <2 x float> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -41,6 +46,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = mul <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = mul <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = mul <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = mul <4 x float> %tmp1, %tmp2
@@ -69,6 +82,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,3 +92,166 @@ define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
+  ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 9fa527f..7764e87 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -1,53 +1,121 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vneg\\.s8} %t | count 2
-; RUN: grep {vneg\\.s16} %t | count 2
-; RUN: grep {vneg\\.s32} %t | count 2
-; RUN: grep {vneg\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index c41c532..7296e93 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadal\\.s8} %t | count 2
-; RUN: grep {vpadal\\.s16} %t | count 2
-; RUN: grep {vpadal\\.s32} %t | count 2
-; RUN: grep {vpadal\\.u8} %t | count 2
-; RUN: grep {vpadal\\.u16} %t | count 2
-; RUN: grep {vpadal\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index baff492..2125573 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,39 +1,155 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadd\\.i8} %t | count 1
-; RUN: grep {vpadd\\.i16} %t | count 1
-; RUN: grep {vpadd\\.i32} %t | count 1
-; RUN: grep {vpadd\\.f32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 0000000..b75bcc9
--- /dev/null
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index c9e2359..a1669b6 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqadd\\.s8} %t | count 2
-; RUN: grep {vqadd\\.s16} %t | count 2
-; RUN: grep {vqadd\\.s32} %t | count 2
-; RUN: grep {vqadd\\.s64} %t | count 2
-; RUN: grep {vqadd\\.u8} %t | count 2
-; RUN: grep {vqadd\\.u16} %t | count 2
-; RUN: grep {vqadd\\.u32} %t | count 2
-; RUN: grep {vqadd\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 0000000..8dcc7f7
--- /dev/null
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index 60b04bd..e4d29a3 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshl\\.s8} %t | count 4
-; RUN: grep {vqshl\\.s16} %t | count 4
-; RUN: grep {vqshl\\.s32} %t | count 4
-; RUN: grep {vqshl\\.s64} %t | count 4
-; RUN: grep {vqshl\\.u8} %t | count 4
-; RUN: grep {vqshl\\.u16} %t | count 4
-; RUN: grep {vqshl\\.u32} %t | count 4
-; RUN: grep {vqshl\\.u64} %t | count 4
-; RUN: grep {vqshl\\.s8.*#7} %t | count 2
-; RUN: grep {vqshl\\.s16.*#15} %t | count 2
-; RUN: grep {vqshl\\.s32.*#31} %t | count 2
-; RUN: grep {vqshl\\.s64.*#63} %t | count 2
-; RUN: grep {vqshl\\.u8.*#7} %t | count 2
-; RUN: grep {vqshl\\.u16.*#15} %t | count 2
-; RUN: grep {vqshl\\.u32.*#31} %t | count 2
-; RUN: grep {vqshl\\.u64.*#63} %t | count 2
-; RUN: grep {vqshlu\\.s8} %t | count 2
-; RUN: grep {vqshlu\\.s16} %t | count 2
-; RUN: grep {vqshlu\\.s32} %t | count 2
-; RUN: grep {vqshlu\\.s64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -133,144 +145,192 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -305,3 +365,167 @@ declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind
 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index 6bd607a..5da7943 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -1,63 +1,72 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshrn\\.s16} %t | count 1
-; RUN: grep {vqshrn\\.s32} %t | count 1
-; RUN: grep {vqshrn\\.s64} %t | count 1
-; RUN: grep {vqshrn\\.u16} %t | count 1
-; RUN: grep {vqshrn\\.u32} %t | count 1
-; RUN: grep {vqshrn\\.u64} %t | count 1
-; RUN: grep {vqshrun\\.s16} %t | count 1
-; RUN: grep {vqshrun\\.s32} %t | count 1
-; RUN: grep {vqshrun\\.s64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -74,3 +83,87 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind
 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 07052f7..4231fca 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqsub\\.s8} %t | count 2
-; RUN: grep {vqsub\\.s16} %t | count 2
-; RUN: grep {vqsub\\.s32} %t | count 2
-; RUN: grep {vqsub\\.s64} %t | count 2
-; RUN: grep {vqsub\\.u8} %t | count 2
-; RUN: grep {vqsub\\.u16} %t | count 2
-; RUN: grep {vqsub\\.u32} %t | count 2
-; RUN: grep {vqsub\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 0000000..99989e9
--- /dev/null
+++ b/test/CodeGen/ARM/vrec.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 0000000..f0a04a4
--- /dev/null
+++ b/test/CodeGen/ARM/vrev.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index 8c5c4aa..f3cbec7 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -1,30 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 4
-; RUN: grep {vshl\\.u16} %t | count 4
-; RUN: grep {vshl\\.u32} %t | count 4
-; RUN: grep {vshl\\.u64} %t | count 4
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vneg\\.s8} %t | count 4
-; RUN: grep {vneg\\.s16} %t | count 4
-; RUN: grep {vneg\\.s32} %t | count 4
-; RUN: grep {vsub\\.i64} %t | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
@@ -32,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
@@ -39,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
@@ -46,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
@@ -53,30 +37,40 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
@@ -84,6 +78,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
@@ -91,6 +87,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
@@ -98,6 +96,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
@@ -105,30 +105,41 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
@@ -136,6 +147,9 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
@@ -143,6 +157,9 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
@@ -150,6 +167,9 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
@@ -157,30 +177,41 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
@@ -188,6 +219,9 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
@@ -195,6 +229,9 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
@@ -202,6 +239,9 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
@@ -209,30 +249,48 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
 }
 
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %shr
+}
+
 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
@@ -240,6 +298,9 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
@@ -247,6 +308,9 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
@@ -254,6 +318,9 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
@@ -261,30 +328,41 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
@@ -292,6 +370,9 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
@@ -299,6 +380,9 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
@@ -306,6 +390,9 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
@@ -313,24 +400,32 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index cb7cbb8..3a4f857 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsli\\.8} %t | count 2
-; RUN: grep {vsli\\.16} %t | count 2
-; RUN: grep {vsli\\.32} %t | count 2
-; RUN: grep {vsli\\.64} %t | count 2
-; RUN: grep {vsri\\.8} %t | count 2
-; RUN: grep {vsri\\.16} %t | count 2
-; RUN: grep {vsri\\.32} %t | count 2
-; RUN: grep {vsri\\.64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -16,6 +10,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -23,6 +19,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
@@ -30,6 +28,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
@@ -37,6 +37,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -44,6 +46,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -51,6 +55,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -58,6 +64,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
@@ -65,6 +73,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -72,6 +82,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -79,6 +91,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -86,6 +100,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -93,6 +109,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -100,6 +118,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -107,6 +127,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -114,6 +136,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 993126e..818e71b 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 2
-; RUN: grep {vshl\\.u16} %t | count 2
-; RUN: grep {vshl\\.u32} %t | count 2
-; RUN: grep {vshl\\.u64} %t | count 2
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -136,48 +148,64 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ; Test a mix of both signed and unsigned intrinsics.
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -186,96 +214,128 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ; Right shift by immediate:
 
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
@@ -300,3 +360,295 @@ declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind re
 declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index f81c09a..8e85b98 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -1,45 +1,48 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshll\\.s8} %t | count 1
-; RUN: grep {vshll\\.s16} %t | count 1
-; RUN: grep {vshll\\.s32} %t | count 1
-; RUN: grep {vshll\\.u8} %t | count 1
-; RUN: grep {vshll\\.u16} %t | count 1
-; RUN: grep {vshll\\.u32} %t | count 1
-; RUN: grep {vshll\\.i8} %t | count 1
-; RUN: grep {vshll\\.i16} %t | count 1
-; RUN: grep {vshll\\.i32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
@@ -48,18 +51,24 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ; The following tests use the maximum shift count, so the signedness is
 ; irrelevant.  Test both signed and unsigned versions.
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index bc640cb..e2544f4 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -1,21 +1,24 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshrn\\.i16} %t | count 1
-; RUN: grep {vshrn\\.i32} %t | count 1
-; RUN: grep {vshrn\\.i64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -24,3 +27,31 @@ define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index e2829dc..acb672d 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -1,22 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsra\\.s8} %t | count 2
-; RUN: grep {vsra\\.s16} %t | count 2
-; RUN: grep {vsra\\.s32} %t | count 2
-; RUN: grep {vsra\\.s64} %t | count 2
-; RUN: grep {vsra\\.u8} %t | count 2
-; RUN: grep {vsra\\.u16} %t | count 2
-; RUN: grep {vsra\\.u32} %t | count 2
-; RUN: grep {vsra\\.u64} %t | count 2
-; RUN: grep {vrsra\\.s8} %t | count 2
-; RUN: grep {vrsra\\.s16} %t | count 2
-; RUN: grep {vrsra\\.s32} %t | count 2
-; RUN: grep {vrsra\\.s64} %t | count 2
-; RUN: grep {vrsra\\.u8} %t | count 2
-; RUN: grep {vrsra\\.u16} %t | count 2
-; RUN: grep {vrsra\\.u32} %t | count 2
-; RUN: grep {vrsra\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -25,6 +11,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -33,6 +21,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -41,6 +31,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
@@ -49,6 +41,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -57,6 +51,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -65,6 +61,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -73,6 +71,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -81,6 +81,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -89,6 +91,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -97,6 +101,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -105,6 +111,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
@@ -113,6 +121,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -121,6 +131,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -129,6 +141,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -137,6 +151,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -145,6 +161,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -153,6 +171,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -161,6 +181,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -169,6 +191,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -177,6 +201,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -185,6 +211,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -193,6 +221,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -201,6 +231,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -209,6 +241,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -217,6 +251,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -225,6 +261,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -233,6 +271,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
@@ -241,6 +281,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -249,6 +291,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -257,6 +301,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -265,6 +311,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 0000000..602b124
--- /dev/null
+++ b/test/CodeGen/ARM/vst1.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;CHECK: vst1.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;CHECK: vst1.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;CHECK: vst1.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
+	ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+	%tmp1 = load <2 x i64>* %B
+	call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 0000000..17d6bee
--- /dev/null
+++ b/test/CodeGen/ARM/vst2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;CHECK: vst2.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 0000000..a831a0c
--- /dev/null
+++ b/test/CodeGen/ARM/vst3.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;CHECK: vst3.8
+;CHECK: vst3.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 0000000..d92c017
--- /dev/null
+++ b/test/CodeGen/ARM/vst4.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;CHECK: vst4.8
+;CHECK: vst4.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;CHECK: vst4.16
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 0000000..3bfb14f
--- /dev/null
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+	ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+	ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+	ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 85dea41..8f0055f 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsub\\.i8} %t | count 2
-; RUN: grep {vsub\\.i16} %t | count 2
-; RUN: grep {vsub\\.i32} %t | count 2
-; RUN: grep {vsub\\.i64} %t | count 2
-; RUN: grep {vsub\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = sub <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = sub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 0000000..9264987
--- /dev/null
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = load <8 x i8>* %C
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = load <8 x i8>* %C
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+	ret <8 x i8> %tmp8
+}
+
+declare <8 x i8>  @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 0000000..5122b09
--- /dev/null
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 0000000..e531718
--- /dev/null
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 0000000..32f7e0d
--- /dev/null
+++ b/test/CodeGen/ARM/vzip.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll
index dadd1b9..5ac4b8c 100644
--- a/test/CodeGen/ARM/weak.ll
+++ b/test/CodeGen/ARM/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*h
+; RUN: llc < %s -march=arm | grep .weak.*f
+; RUN: llc < %s -march=arm | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index a57a767..cf327bb 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak
+; RUN: llc < %s -march=arm | grep .weak
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
index c96b14a..87d9928 100644
--- a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
+++ b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
@@ -1,5 +1,5 @@
 ; There should be exactly two calls here (memset and malloc), no more.
-; RUN: llvm-as < %s | llc -march=alpha | grep jsr | count 2
+; RUN: llc < %s -march=alpha | grep jsr | count 2
 
 %typedef.bc_struct = type opaque
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
index b45c2a4..4b3d022 100644
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 @.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
 
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
index f89997e..65d2a8d 100644
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
@@ -1,5 +1,5 @@
 ; The global symbol should be legalized
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
         %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
index 05ebe1e..45587f0 100644
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
index f3ff5b1..2b28903 100644
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ b/test/CodeGen/Alpha/2006-04-04-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
index 6b55047..5d31bc3 100644
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
index 3f42eda..14e0bcc 100644
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ b/test/CodeGen/Alpha/2006-11-01-vastart.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
index 3eac13d..b537e25 100644
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 ;FIXME: this should produce no mul inst.  But not crashing will have to do for now
 
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
index 9d814da..1a4b40e 100644
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
index e6e5746..8b9b603 100644
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ b/test/CodeGen/Alpha/2008-11-12-Add128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3044
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
new file mode 100644
index 0000000..cfbf7fc
--- /dev/null
+++ b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha
+
+define i1 @a(float %x) {
+  %r = fcmp ult float %x, 1.0
+  ret i1 %r
+}
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
index 260584b..24a7418 100644
--- a/test/CodeGen/Alpha/add.ll
+++ b/test/CodeGen/Alpha/add.ll
@@ -1,6 +1,6 @@
 ;test all the shifted and signextending adds and subs with and without consts
 ;
-; RUN: llvm-as < %s | llc -march=alpha -o %t.s -f
+; RUN: llc < %s -march=alpha -o %t.s
 ; RUN: grep {	addl} %t.s | count 2
 ; RUN: grep {	addq} %t.s | count 2
 ; RUN: grep {	subl} %t.s | count 2
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
index 61d0208..fa3b949 100644
--- a/test/CodeGen/Alpha/add128.ll
+++ b/test/CodeGen/Alpha/add128.ll
@@ -1,6 +1,6 @@
 ;test for ADDC and ADDE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @add128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
index 6e63511..9f00350 100644
--- a/test/CodeGen/Alpha/bic.ll
+++ b/test/CodeGen/Alpha/bic.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {bic}
+; RUN: llc < %s -march=alpha | grep {bic}
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
index d461857..14f6b46 100644
--- a/test/CodeGen/Alpha/bsr.ll
+++ b/test/CodeGen/Alpha/bsr.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens the bsr instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep bsr
+; RUN: llc < %s -march=alpha | grep bsr
 
 define internal i64 @abc(i32 %x) {
         %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
index ee8cda8..24e97a9 100644
--- a/test/CodeGen/Alpha/call_adj.ll
+++ b/test/CodeGen/Alpha/call_adj.ll
@@ -1,5 +1,5 @@
 ;All this should do is not crash
-;RUN: llvm-as < %s | llc -march=alpha
+;RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
index 08e1dad..9b655f0 100644
--- a/test/CodeGen/Alpha/cmov.ll
+++ b/test/CodeGen/Alpha/cmov.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | not grep cmovlt
-; RUN: llvm-as < %s | llc -march=alpha | grep cmoveq
+; RUN: llc < %s -march=alpha | not grep cmovlt
+; RUN: llc < %s -march=alpha | grep cmoveq
 
 define i64 @cmov_lt(i64 %a, i64 %c) {
 entry:
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
index 9b83215..e88d2ee 100644
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ b/test/CodeGen/Alpha/cmpbge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep cmpbge | count 2
+; RUN: llc < %s -march=alpha | grep cmpbge | count 2
 
 define i1 @test1(i64 %A, i64 %B) {
         %C = and i64 %A, 255            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
index 83d97b5..aa1588a 100644
--- a/test/CodeGen/Alpha/ctlz.ll
+++ b/test/CodeGen/Alpha/ctlz.ll
@@ -1,8 +1,8 @@
 ; Make sure this testcase codegens to the ctlz instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
 
 declare i8 @llvm.ctlz.i8(i8)
 
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
index 56027dd..230e096 100644
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ b/test/CodeGen/Alpha/ctlz_e.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i ctpop 
+; RUN: llc < %s -march=alpha | not grep -i ctpop 
 
 declare i64 @llvm.ctlz.i64(i64)
 
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
index a528d72..f887882 100644
--- a/test/CodeGen/Alpha/ctpop.ll
+++ b/test/CodeGen/Alpha/ctpop.ll
@@ -1,10 +1,10 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | \
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
+; RUN: llc < %s -march=alpha -mattr=+CIX | \
 ; RUN:   grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | \
+; RUN: llc < %s -march=alpha -mcpu=ev6 | \
 ; RUN:   not grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | \
+; RUN: llc < %s -march=alpha -mattr=-CIX | \
 ; RUN:   not grep -i ctpop
 
 declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
index 2539d72..b3413d6 100644
--- a/test/CodeGen/Alpha/eqv.ll
+++ b/test/CodeGen/Alpha/eqv.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the eqv instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
index 7af8134..ffeafbd 100644
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ b/test/CodeGen/Alpha/i32_sub_1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep -i {subl \$16,1,\$0}
+; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
 
 
 define i32 @foo(i32 signext %x) signext {
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
index c95d571..4cf80dee 100644
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ b/test/CodeGen/Alpha/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=alphaev6-unknown-linux-gnu
+; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
index d7b6116..917c932 100644
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ b/test/CodeGen/Alpha/jmp_table.ll
@@ -1,9 +1,9 @@
 ; try to check that we have the most important instructions, which shouldn't 
 ; appear otherwise
-; RUN: llvm-as < %s | llc -march=alpha | grep jmp
-; RUN: llvm-as < %s | llc -march=alpha | grep gprel32
-; RUN: llvm-as < %s | llc -march=alpha | grep ldl
-; RUN: llvm-as < %s | llc -march=alpha | grep rodata
+; RUN: llc < %s -march=alpha | grep jmp
+; RUN: llc < %s -march=alpha | grep gprel32
+; RUN: llc < %s -march=alpha | grep ldl
+; RUN: llc < %s -march=alpha | grep rodata
 ; END.
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
index 50c245f..93e8b1b 100644
--- a/test/CodeGen/Alpha/mb.ll
+++ b/test/CodeGen/Alpha/mb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep mb
+; RUN: llc < %s -march=alpha | grep mb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
index b069fea..daf8409 100644
--- a/test/CodeGen/Alpha/mul128.ll
+++ b/test/CodeGen/Alpha/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
 entry:
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
index 5af73a1..4075dd6 100644
--- a/test/CodeGen/Alpha/mul5.ll
+++ b/test/CodeGen/Alpha/mul5.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use mulq
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i mul
+; RUN: llc < %s -march=alpha | not grep -i mul
 
 define i64 @foo1(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
index ddaed4a..0db767f 100644
--- a/test/CodeGen/Alpha/neg1.ll
+++ b/test/CodeGen/Alpha/neg1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {\\-1}
+; RUN: llc < %s -march=alpha | grep {\\-1}
 
 define i64 @bar() {
 entry:
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
index cea9f6b..4f0a5c2 100644
--- a/test/CodeGen/Alpha/not.ll
+++ b/test/CodeGen/Alpha/not.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
index b8d350d..f930e34 100644
--- a/test/CodeGen/Alpha/ornot.ll
+++ b/test/CodeGen/Alpha/ornot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep ornot
+; RUN: llc < %s -march=alpha | grep ornot
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 2d9ed1e..96ab4eb 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=alpha > %t
+; RUN: llc < %s -march=alpha > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep bsr.*\\\$\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
index 193a47f..d6665b5 100644
--- a/test/CodeGen/Alpha/rpcc.ll
+++ b/test/CodeGen/Alpha/rpcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep rpcc
+; RUN: llc < %s -march=alpha | grep rpcc
 
 declare i64 @llvm.readcyclecounter()
 
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
index 2344833..3042ef3 100644
--- a/test/CodeGen/Alpha/srl_and.ll
+++ b/test/CodeGen/Alpha/srl_and.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
index cb18559..d26404b 100644
--- a/test/CodeGen/Alpha/sub128.ll
+++ b/test/CodeGen/Alpha/sub128.ll
@@ -1,6 +1,6 @@
 ;test for SUBC and SUBE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @sub128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
index e00e6d7..ff04de9 100644
--- a/test/CodeGen/Alpha/weak.ll
+++ b/test/CodeGen/Alpha/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*h
+; RUN: llc < %s -march=alpha | grep .weak.*f
+; RUN: llc < %s -march=alpha | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/Alpha/wmb.ll b/test/CodeGen/Alpha/wmb.ll
index f745cd5..a3e2ccf 100644
--- a/test/CodeGen/Alpha/wmb.ll
+++ b/test/CodeGen/Alpha/wmb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep wmb
+; RUN: llc < %s -march=alpha | grep wmb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
index 7fec19b..d00984a 100644
--- a/test/CodeGen/Alpha/zapnot.ll
+++ b/test/CodeGen/Alpha/zapnot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 
 define i16 @foo(i64 %y) zeroext {
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
index 6a33ca2..cd3caae 100644
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ b/test/CodeGen/Alpha/zapnot2.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
index 26aab37..f02961f 100644
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ b/test/CodeGen/Alpha/zapnot3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 ;demanded bits mess up this mask in a hard to fix way
 ;define i64 @foo(i64 %y) {
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
index 1be3ca2..89beeef 100644
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ b/test/CodeGen/Alpha/zapnot4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
         %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
new file mode 100644
index 0000000..3ee5e8d
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+
+; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
+; super-register is illegally extended.
+
+define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
+  %y1 = add i16 %x1, 1
+  %y2 = add i16 %x2, 2
+  %y3 = add i16 %x3, 3
+  %y4 = add i16 %x4, 4
+  %z12 = add i16 %y1, %y2
+  %z34 = add i16 %y3, %y4
+  %p = add i16 %z12, %z34
+  ret i16 %p
+}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
new file mode 100644
index 0000000..e5d1637
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
+	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i64 %d, i64* %DP
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
new file mode 100644
index 0000000..0b731dc
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; When joining live intervals of sub-registers, an MBB live-in list is not
+; updated properly. The register scavenger asserts on an undefined register.
+
+define i32 @foo(i8 %bar) {
+entry:
+  switch i8 %bar, label %bb1203 [
+    i8 117, label %bb1204
+    i8 85, label %bb1204
+    i8 106, label %bb1204
+  ]
+
+bb1203:                                           ; preds = %entry
+  ret i32 1
+
+bb1204:                                           ; preds = %entry, %entry, %entry
+  ret i32 2
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
new file mode 100644
index 0000000..dcc3ea0
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
+; argument registers. This then causes register scavenger asserts.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
+  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
+  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
+  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
+  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
+  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
+  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
+  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
+  br i1 %b, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
new file mode 100644
index 0000000..f21da52
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+; XFAIL: *
+
+; An undef argument causes a setugt node to escape instruction selection.
+
+define void @bugt() {
+cond_next305:
+  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
+  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
+  br i1 %tmp308, label %bb311, label %bb314
+
+bb311:                                            ; preds = %cond_next305
+  unreachable
+
+bb314:                                            ; preds = %cond_next305
+  ret void
+}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
new file mode 100644
index 0000000..e982e43
--- /dev/null
+++ b/test/CodeGen/Blackfin/add-overflow.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	type { i24, i1 }		; type %0
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
+	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
+	br i1 %obit, label %carry, label %normal
+
+normal:		; preds = %entry
+	ret i1 true
+
+carry:		; preds = %entry
+	ret i1 false
+}
+
+declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
new file mode 100644
index 0000000..3311c03
--- /dev/null
+++ b/test/CodeGen/Blackfin/add.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
new file mode 100644
index 0000000..dd56101
--- /dev/null
+++ b/test/CodeGen/Blackfin/addsub-i128.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; These functions have just the right size to annoy the register scavenger: They
+; use all the scratch registers, but not all the callee-saved registers.
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
new file mode 100644
index 0000000..c63adab
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i1.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @add(i1 %A, i1 %B) {
+	%R = add i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sub(i1 %A, i1 %B) {
+	%R = sub i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @mul(i1 %A, i1 %B) {
+	%R = mul i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sdiv(i1 %A, i1 %B) {
+	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @udiv(i1 %A, i1 %B) {
+	%R = udiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @srem(i1 %A, i1 %B) {
+	%R = srem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @urem(i1 %A, i1 %B) {
+	%R = urem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @and(i1 %A, i1 %B) {
+	%R = and i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @or(i1 %A, i1 %B) {
+	%R = or i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @xor(i1 %A, i1 %B) {
+	%R = xor i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
new file mode 100644
index 0000000..541e9a8
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i16.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @add(i16 %A, i16 %B) {
+	%R = add i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sub(i16 %A, i16 %B) {
+	%R = sub i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @mul(i16 %A, i16 %B) {
+	%R = mul i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sdiv(i16 %A, i16 %B) {
+	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @udiv(i16 %A, i16 %B) {
+	%R = udiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @srem(i16 %A, i16 %B) {
+	%R = srem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @urem(i16 %A, i16 %B) {
+	%R = urem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
new file mode 100644
index 0000000..4b5dbfc
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i32.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
new file mode 100644
index 0000000..d4dd8e2
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i64.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i64 @add(i64 %A, i64 %B) {
+	%R = add i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sub(i64 %A, i64 %B) {
+	%R = sub i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @mul(i64 %A, i64 %B) {
+	%R = mul i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sdiv(i64 %A, i64 %B) {
+	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @udiv(i64 %A, i64 %B) {
+	%R = udiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @srem(i64 %A, i64 %B) {
+	%R = srem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @urem(i64 %A, i64 %B) {
+	%R = urem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @and(i64 %A, i64 %B) {
+	%R = and i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @or(i64 %A, i64 %B) {
+	%R = or i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @xor(i64 %A, i64 %B) {
+	%R = xor i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
new file mode 100644
index 0000000..2c7ce9d
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i8.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin
+
+define i8 @add(i8 %A, i8 %B) {
+	%R = add i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sub(i8 %A, i8 %B) {
+	%R = sub i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @mul(i8 %A, i8 %B) {
+	%R = mul i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sdiv(i8 %A, i8 %B) {
+	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @udiv(i8 %A, i8 %B) {
+	%R = udiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @srem(i8 %A, i8 %B) {
+	%R = srem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @urem(i8 %A, i8 %B) {
+	%R = urem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @and(i8 %A, i8 %B) {
+	%R = and i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @or(i8 %A, i8 %B) {
+	%R = or i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @xor(i8 %A, i8 %B) {
+	%R = xor i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
new file mode 100644
index 0000000..85040df
--- /dev/null
+++ b/test/CodeGen/Blackfin/basictest.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Blackfin/burg.ll b/test/CodeGen/Blackfin/burg.ll
new file mode 100644
index 0000000..8cc3713
--- /dev/null
+++ b/test/CodeGen/Blackfin/burg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%IntList = type %struct.intlist*
+	%ReadFn = type i32 ()*
+	%YYSTYPE = type { %IntList }
+	%struct.intlist = type { i32, %IntList }
+@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+
+define i32 @yyparse() {
+bb0:
+	%reg254 = load i16* null		; <i16> [#uses=1]
+	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
+	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
+	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
+	%reg261.idx1 = getelementptr %YYSTYPE* null, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
+	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
+	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	unreachable
+}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
new file mode 100644
index 0000000..e1732a8
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp-small-imm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @cmp3(i32 %A) {
+	%R = icmp uge i32 %A, 2
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
new file mode 100644
index 0000000..ef5bf45
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin
+
+; This test tries to use a JustCC register as a data operand for MOVEcc.  It
+; calls copyRegToReg(JustCC -> DP), failing because JustCC can only be copied to
+; D.  The proper solution would be to restrict the virtual register to D only.
+
+define i32 @main() {
+entry:
+	br label %loopentry
+
+loopentry:
+	%done = icmp sle i64 undef, 5
+	br i1 %done, label %loopentry, label %exit.1
+
+exit.1:
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
new file mode 100644
index 0000000..e9b66eb
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct32.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctlztest(i32 %B) {
+	%b = call i32 @llvm.ctlz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @cttztest(i32 %B) {
+	%b = call i32 @llvm.cttz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @ctpoptest(i32 %B) {
+	%b = call i32 @llvm.ctpop.i32( i32 %B )
+	ret i32 %b;
+}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
new file mode 100644
index 0000000..ac4bdcf
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctlztest(i64 %B) {
+	%b = call i64 @llvm.ctlz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @cttztest(i64 %B) {
+	%b = call i64 @llvm.cttz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @ctpoptest(i64 %B) {
+	%b = call i64 @llvm.ctpop.i64( i64 %B )
+	ret i64 %b;
+}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
new file mode 100644
index 0000000..56a65c0
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctlz.i16(i16)
+
+define i16 @ctlztest(i16 %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctlztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctlztest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
new file mode 100644
index 0000000..3e22f88
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define i32 @main(i64 %arg) nounwind {
+entry:
+	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias, ...) nounwind
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
new file mode 100644
index 0000000..cbbb3d9
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctpop16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctpop.i16(i16)
+
+define i16 @ctpoptest(i16 %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctpoptest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctpoptest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
new file mode 100644
index 0000000..05fe9bf
--- /dev/null
+++ b/test/CodeGen/Blackfin/cttz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i16 @cttztest(i16 %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @cttztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @cttztest_s(i16 signext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
new file mode 100644
index 0000000..6451c74
--- /dev/null
+++ b/test/CodeGen/Blackfin/cycles.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+declare i64 @llvm.readcyclecounter()
+
+; CHECK: cycles
+; CHECK: cycles2
+define i64 @cyc64() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+	ret i64 %tmp.1
+}
+
+; CHECK: cycles
+define i32@cyc32() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+        %s = trunc i64 %tmp.1 to i32
+	ret i32 %s
+}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
new file mode 100644
index 0000000..5fdbe5f
--- /dev/null
+++ b/test/CodeGen/Blackfin/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Blackfin] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
new file mode 100644
index 0000000..815ca79
--- /dev/null
+++ b/test/CodeGen/Blackfin/double-cast.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
new file mode 100644
index 0000000..7e677fb
--- /dev/null
+++ b/test/CodeGen/Blackfin/frameindex.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
+	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
new file mode 100644
index 0000000..bc5ade7
--- /dev/null
+++ b/test/CodeGen/Blackfin/i17mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
new file mode 100644
index 0000000..cb03e3d
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
new file mode 100644
index 0000000..6b5612c
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1ops.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
+	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
+	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
+	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
+	ret i32 %tmp.27
+}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
new file mode 100644
index 0000000..9f8cf48
--- /dev/null
+++ b/test/CodeGen/Blackfin/i216mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
new file mode 100644
index 0000000..db23f54
--- /dev/null
+++ b/test/CodeGen/Blackfin/i248mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
new file mode 100644
index 0000000..bc5ade7
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
new file mode 100644
index 0000000..df74c9a
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256param.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
new file mode 100644
index 0000000..ca02563
--- /dev/null
+++ b/test/CodeGen/Blackfin/i56param.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
new file mode 100644
index 0000000..ea3a67e
--- /dev/null
+++ b/test/CodeGen/Blackfin/i8mem.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin
+
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
new file mode 100644
index 0000000..d623f6b
--- /dev/null
+++ b/test/CodeGen/Blackfin/inline-asm.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+; Standard "r"
+; CHECK: r0 = r0 + r1;
+define i32 @add_r(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "d"
+; CHECK: r0 = r0 - r1;
+define i32 @add_d(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "a" for P-regs
+; CHECK: p0 = (p0 + p1) << 1;
+define i32 @add_a(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "z" for P0, P1, P2. This is not a real regclass
+; CHECK: p0 = (p0 + p1) << 2;
+define i32 @add_Z(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "C" for CC. This is a single register
+; CHECK: cc = p0 < p1;
+; CHECK: r0 = cc;
+define i32 @add_C(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
new file mode 100644
index 0000000..6bd9f86
--- /dev/null
+++ b/test/CodeGen/Blackfin/int-setcc.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1605 = load i8* null		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
new file mode 100644
index 0000000..a8c01ba
--- /dev/null
+++ b/test/CodeGen/Blackfin/invalid-apint.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin
+
+; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
+; function trunc, file APInt.cpp, line 956.
+
+@str2 = external global [29 x i8]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32
+	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
new file mode 100644
index 0000000..5f49e9d1
--- /dev/null
+++ b/test/CodeGen/Blackfin/jumptable.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+; CHECK: .section .rodata
+; CHECK: JTI1_0:
+; CHECK: .long .BB1_1
+
+define i32 @oper(i32 %op, i32 %A, i32 %B) {
+entry:
+        switch i32 %op, label %bbx [
+               i32 1 , label %bb1
+               i32 2 , label %bb2
+               i32 3 , label %bb3
+               i32 4 , label %bb4
+               i32 5 , label %bb5
+               i32 6 , label %bb6
+               i32 7 , label %bb7
+               i32 8 , label %bb8
+               i32 9 , label %bb9
+               i32 10, label %bb10
+        ]
+bb1:
+	%R1 = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R1
+bb2:
+	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R2
+bb3:
+	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R3
+bb4:
+	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R4
+bb5:
+	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R5
+bb6:
+	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R6
+bb7:
+	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R7
+bb8:
+	%R8 = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R8
+bb9:
+	%R9 = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R9
+bb10:
+	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R10
+bbx:
+        ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
new file mode 100644
index 0000000..02d32ef
--- /dev/null
+++ b/test/CodeGen/Blackfin/large-switch.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -march=bfin
+
+; The switch expansion uses a dynamic shl, and it produces a jumptable
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		i32 -1, label %bb2063
+		i32 19, label %bb2035
+		i32 20, label %bb2035
+		i32 21, label %bb2035
+		i32 23, label %bb2035
+		i32 24, label %bb2035
+		i32 27, label %bb2035
+		i32 32, label %bb2035
+		i32 33, label %bb1994
+		i32 35, label %bb2035
+		i32 36, label %bb1994
+		i32 90, label %bb1948
+		i32 94, label %bb1948
+		i32 95, label %bb1948
+		i32 133, label %bb1419
+		i32 135, label %bb1238
+		i32 136, label %bb1238
+		i32 137, label %bb1238
+		i32 138, label %bb1238
+		i32 139, label %bb1201
+		i32 140, label %bb1201
+		i32 141, label %bb1154
+		i32 142, label %bb1126
+		i32 144, label %bb1201
+		i32 145, label %bb1126
+		i32 146, label %bb1201
+		i32 147, label %bb1126
+		i32 148, label %bb1201
+		i32 149, label %bb1126
+		i32 150, label %bb1201
+		i32 151, label %bb1126
+		i32 152, label %bb1096
+		i32 153, label %bb1096
+		i32 154, label %bb1096
+		i32 157, label %bb1096
+		i32 158, label %bb1096
+		i32 159, label %bb1096
+		i32 162, label %bb1096
+		i32 163, label %bb1096
+		i32 164, label %bb1096
+		i32 167, label %bb1201
+		i32 168, label %bb1201
+		i32 170, label %bb1201
+		i32 171, label %bb1201
+		i32 173, label %bb1201
+		i32 174, label %bb1201
+		i32 176, label %bb1201
+		i32 177, label %bb1201
+		i32 179, label %bb993
+		i32 180, label %bb993
+		i32 181, label %bb993
+		i32 182, label %bb993
+		i32 183, label %bb993
+		i32 184, label %bb993
+		i32 365, label %bb1126
+		i32 366, label %bb1126
+		i32 367, label %bb1126
+		i32 368, label %bb1126
+		i32 369, label %bb1126
+		i32 370, label %bb1126
+		i32 371, label %bb1126
+		i32 372, label %bb1126
+		i32 373, label %bb1126
+		i32 384, label %bb1126
+		i32 385, label %bb1126
+		i32 386, label %bb1126
+		i32 387, label %bb1126
+		i32 388, label %bb1126
+		i32 389, label %bb1126
+		i32 390, label %bb1126
+		i32 391, label %bb1126
+		i32 392, label %bb1126
+		i32 525, label %bb919
+		i32 526, label %bb839
+		i32 528, label %bb919
+		i32 529, label %bb839
+		i32 532, label %cond_next6.i97
+		i32 533, label %cond_next6.i81
+		i32 534, label %bb495
+		i32 536, label %cond_next6.i81
+		i32 537, label %cond_next6.i81
+		i32 538, label %bb396
+		i32 539, label %bb288
+		i32 541, label %bb396
+		i32 542, label %bb396
+		i32 543, label %bb396
+		i32 544, label %bb396
+		i32 545, label %bb189
+		i32 546, label %cond_next6.i
+		i32 547, label %bb189
+		i32 548, label %cond_next6.i
+		i32 549, label %bb189
+		i32 550, label %cond_next6.i
+		i32 551, label %bb189
+		i32 552, label %cond_next6.i
+		i32 553, label %bb189
+		i32 554, label %cond_next6.i
+		i32 555, label %bb189
+		i32 556, label %cond_next6.i
+		i32 557, label %bb189
+		i32 558, label %cond_next6.i
+		i32 618, label %bb40
+		i32 619, label %bb18
+		i32 620, label %bb40
+		i32 621, label %bb10
+		i32 622, label %bb10
+	]
+
+bb10:
+	ret void
+
+bb18:
+	ret void
+
+bb40:
+	ret void
+
+cond_next6.i:
+	ret void
+
+bb189:
+	ret void
+
+bb288:
+	ret void
+
+bb396:
+	ret void
+
+bb495:
+	ret void
+
+cond_next6.i81:
+	ret void
+
+cond_next6.i97:
+	ret void
+
+bb839:
+	ret void
+
+bb919:
+	ret void
+
+bb993:
+	ret void
+
+bb1096:
+	ret void
+
+bb1126:
+	ret void
+
+bb1154:
+	ret void
+
+bb1201:
+	ret void
+
+bb1238:
+	ret void
+
+bb1419:
+	ret void
+
+bb1948:
+	ret void
+
+bb1994:
+	ret void
+
+bb2035:
+	ret void
+
+bb2063:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
new file mode 100644
index 0000000..eb18d41
--- /dev/null
+++ b/test/CodeGen/Blackfin/load-i16.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; This somewhat contrived function heavily exercises register classes
+; It can trick -join-cross-class-copies into making illegal joins
+
+define void @f(i16** nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load i16** %p		; <i16*> [#uses=1]
+	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
+	%ptr = getelementptr i16* %tmp1, i16 %tmp2
+    store i16 %tmp2, i16* %ptr
+    ret void
+}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
new file mode 100644
index 0000000..e44672f
--- /dev/null
+++ b/test/CodeGen/Blackfin/logic-i16.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @and(i16 %A, i16 %B) {
+	%R = and i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @or(i16 %A, i16 %B) {
+	%R = or i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @xor(i16 %A, i16 %B) {
+	%R = xor i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
new file mode 100644
index 0000000..8c52874
--- /dev/null
+++ b/test/CodeGen/Blackfin/many-args.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.218 = load float* null		; <float> [#uses=1]
+	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
+	ret i32 0
+}
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
new file mode 100644
index 0000000..72bacee
--- /dev/null
+++ b/test/CodeGen/Blackfin/mulhu.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		i32 0, label %bb
+		i32 1, label %bb6
+		i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
new file mode 100644
index 0000000..9e54b73
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
new file mode 100644
index 0000000..7ac7e80
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
new file mode 100644
index 0000000..c247aca
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin > %t
+
+; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
+; operation after LegalizeOps.
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:
+	%tmp = load i8* null
+	%tmp51 = zext i8 %tmp to i16
+	%tmp99 = load i8* null
+	%tmp54 = bitcast i8 %tmp99 to i8
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
+	%tmp52 = shl i16 %tmp51, 5
+	%tmp56 = and i16 %tmp55.upgrd.2, 28
+	%tmp57 = or i16 %tmp56, %tmp52
+	%tmp60 = zext i16 %tmp57 to i32
+	%tmp62 = xor i32 0, 65535
+	%tmp63 = mul i32 %tmp60, %tmp62
+	%tmp65 = add i32 0, %tmp63
+	%tmp69 = add i32 0, %tmp65
+	%tmp70 = lshr i32 %tmp69, 16
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:
+	ret void
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
new file mode 100644
index 0000000..d344fad
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-setcc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=bfin > %t
+
+; The DAG combiner may sometimes create illegal i16 SETCC operations when run
+; after LegalizeOps. Try to tease out all the optimizations in
+; TargetLowering::SimplifySetCC.
+
+@x = external global i16
+@y = external global i16
+
+declare i16 @llvm.ctlz.i16(i16)
+
+; Case (srl (ctlz x), 5) == const
+; Note: ctlz is promoted, so this test does not catch the DAG combiner
+define i1 @srl_ctlz_const() {
+  %x = load i16* @x
+  %c = call i16 @llvm.ctlz.i16(i16 %x)
+  %s = lshr i16 %c, 4
+  %r = icmp eq i16 %s, 1
+  ret i1 %r
+}
+
+; Case (zext x) == const
+define i1 @zext_const() {
+  %x = load i16* @x
+  %r = icmp ugt i16 %x, 1
+  ret i1 %r
+}
+
+; Case (sext x) == const
+define i1 @sext_const() {
+  %x = load i16* @x
+  %y = add i16 %x, 1
+  %x2 = sext i16 %y to i32
+  %r = icmp ne i32 %x2, -1
+  ret i1 %r
+}
+
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
new file mode 100644
index 0000000..1426655
--- /dev/null
+++ b/test/CodeGen/Blackfin/sdiv.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
new file mode 100644
index 0000000..0f7f270
--- /dev/null
+++ b/test/CodeGen/Blackfin/simple-select.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
new file mode 100644
index 0000000..3680ec6
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		i32 1, label %bb
+		i32 0, label %bb13
+	]
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb13:		; preds = %entry
+	ret i32 1
+
+out:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
new file mode 100644
index 0000000..7877bce
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp = load i8* null		; <i8> [#uses=1]
+	switch i8 %tmp, label %bb [
+		i8 0, label %bb7
+		i8 120, label %bb7
+	]
+
+bb7:		; preds = %bb, %bb
+	ret i8* null
+}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
new file mode 100644
index 0000000..75084f0
--- /dev/null
+++ b/test/CodeGen/Blackfin/sync-intr.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+define void @f() nounwind {
+entry:
+        ; CHECK: csync;
+        call void @llvm.bfin.csync()
+        ; CHECK: ssync;
+        call void @llvm.bfin.ssync()
+	ret void
+}
+
+declare void @llvm.bfin.csync() nounwind
+declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
index c69b9b0..0b06041 100644
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure that global variables do not collide if they have the same name,
 ; but different types.
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
index 2908157..a9f54e4 100644
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This case was emitting code that looked like this:
 ; ...
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
index 297807e..2afb1a0 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Test const pointer refs & forward references
 
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
index ead1bce..b71cf07 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
 global float* @2                       ;; Duplicate forward numeric reference
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
index 7ae13ec..b5a1f0b 100644
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
 @somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
index 25f63a0..10b9fe2 100644
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @fptr = global void ()* @f       ;; Forward ref method defn
 declare void @f()               ;; External method
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
index 528b8de..0827423 100644
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
 @arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
index e9df0c2..3b2085c 100644
--- a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
+++ b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @MyIntList = external global { \2*, i32 }
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
index ccffe68..59aafd5 100644
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C Writer bombs on this testcase because it tries the print the prototype
 ; for the test function, which tries to print the argument name.  The function
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
index bf592ce..6c4d629 100644
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Indirect function call test... found by Joel & Brian
 ;
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
index e048905..1187a37 100644
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase fails because the C backend does not arrange to output the 
 ; contents of a structure type before it outputs the structure type itself.
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
index ebb1c0f..021adb9 100644
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @test() {
         %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
index 69f4575..e915cd2 100644
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 
 declare void @foo(...)
diff --git a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
index 2f6d9be..2563d8c 100644
--- a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
+++ b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %MPI_Comm = type %struct.Comm*
 %struct.Comm = type opaque
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
index d40cbda..2cdd15c 100644
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ b/test/CodeGen/CBackend/2002-10-16-External.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @bob = external global i32              ; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
index a17b8db..54e0aa6 100644
--- a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
+++ b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %BitField = type i32
         %tokenptr = type i32*
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
index 2dd281a..82d594f 100644
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
index 4a7170d..92d582d 100644
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Apparently this constant was unsigned in ISO C 90, but not in C 99.
 
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
index 2a4e839..a42dc27 100644
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase breaks the C backend, because gcc doesn't like (...) functions
 ; with no arguments at all.
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
index fb7e2ba..19c7840 100644
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C backend was dying when there was no typename for a struct type!
 
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
index 6b7f9f0..048e045 100644
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %X = type { i32, float }
 
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
index c6128d6..6197b30 100644
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure hex constant does not continue into a valid hexadecimal letter/number
 @version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
index fd68211..f6177ea 100644
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
index 9fe98e2..f0b1bbc 100644
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare i32 @callee(i32, i32)
 
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
index ef3b579..4bd1da2 100644
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c | grep common | grep X
+; RUN: llc < %s -march=c | grep common | grep X
 
 @X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
index 077f16c..0fbb3fe 100644
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This is a non-normal FP value: it's a nan.
 @NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
index e67ba2e..9195634 100644
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
 
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
index 41f3f1e..b4389ff 100644
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; reduced from DOOM.
         %union._XEvent = type { i32 }
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
index e8da787..6a26291 100644
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @y = weak global i8 0           ; <i8*> [#uses=1]
 
 define i32 @testcaseshr() {
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
index 911d6d4..142fbd8 100644
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep builtin_return_address
+; RUN: llc < %s -march=c | grep builtin_return_address
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
index 1629deb..d1c6861 100644
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
@@ -4,7 +4,7 @@
 ; this testcase for example, which caused the CBE to mangle one, screwing
 ; everything up.  :(  Test that this does not happen anymore.
 ;
-; RUN: llvm-as < %s | llc -march=c | not grep _memcpy
+; RUN: llc < %s -march=c | not grep _memcpy
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
index 6f23915..6fceb08 100644
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
@@ -1,5 +1,5 @@
 ; This is a non-normal FP value
-; RUN: llvm-as < %s | llc -march=c | grep FPConstant | grep static
+; RUN: llc < %s -march=c | grep FPConstant | grep static
 
 define float @func() {
         ret float 0xFFF0000000000000
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
index 2d62231..cf59634 100644
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep func1 | grep WEAK
+; RUN: llc < %s -march=c | grep func1 | grep WEAK
 
 define linkonce i32 @func1() {
         ret i32 5
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
index ae7ba53..3ee23d1 100644
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @llvm.va_end(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
index a8ee438..af8f441 100644
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
@@ -1,7 +1,7 @@
 ; The CBE should not emit code that casts the function pointer.  This causes
 ; GCC to get testy and insert trap instructions instead of doing the right
 ; thing. :(
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @external(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
index 8acab76..78e9bac 100644
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep extern.*msg
+; RUN: llc < %s -march=c | not grep extern.*msg
 ; PR472
 
 @msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
index 9acaa72..57a9adc 100644
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define i32 @foo() {
         ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
index 12c8790..dd505af 100644
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep volatile
+; RUN: llc < %s -march=c | grep volatile
 
 define void @test(i32* %P) {
         %X = volatile load i32* %P              ; <i32> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
index 162e3d3..1c5f506 100644
--- a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
+++ b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %JNIEnv = type %struct.JNINa*
         %struct.JNINa = type { i8*, i8*, i8*, void (%JNIEnv*)* }
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
index 55d43e2..808b8f9 100644
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep -- --65535
+; RUN: llc < %s -march=c | not grep -- --65535
 ; PR596
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
index 7c55019..6e650eb 100644
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep fmod
+; RUN: llc < %s -march=c | grep fmod
 
 define double @test(double %A, double %B) {
         %C = frem double %A, %B         ; <double> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
index 37f311d..99de837 100644
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* *volatile *\\*}
+; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
 
 @G = external global void ()*           ; <void ()**> [#uses=2]
 
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
index f8393a3..c9df800 100644
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:   grep __BITCAST | count 14
 
 define i32 @test1(float %F) {
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
index 63dd9da..da36e78 100644
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
@@ -1,5 +1,5 @@
 ; For PR1099
-; RUN: llvm-as < %s | llc -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
+; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
index 42fa0d8..8a5f253 100644
--- a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
+++ b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
@@ -1,5 +1,5 @@
 ; PR918
-; RUN: llvm-as < %s | llc -march=c | not grep {l_structtype_s l_fixarray_array3}
+; RUN: llc < %s -march=c | not grep {l_structtype_s l_fixarray_array3}
 
 %structtype_s = type { i32 }
 %fixarray_array3 = type [3 x %structtype_s]
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
index 8fe06b7..4f699b7 100644
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_save
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_restore
+; RUN: llc < %s -march=c | grep __builtin_stack_save
+; RUN: llc < %s -march=c | grep __builtin_stack_restore
 ; PR1028
 
 declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
index f253b30..7d508e4 100644
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ b/test/CodeGen/CBackend/2007-02-05-memset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR1181
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
index eb5cb86..7e1ff2a 100644
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
@@ -1,7 +1,7 @@
 ; PR1164
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llvm-as < %s | llc -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
+; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
+; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
+; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
 
 @G = global i32 123
 @ltmp_0_1 = global i32 123
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
index 6057616..c8bfdd6 100644
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {packed}
+; RUN: llc < %s -march=c | grep {packed}
 
 	%struct.p = type <{ i16 }>
 
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
index 269126d..6e0cf68 100644
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
 
 define void @test(i32* %P) {
diff --git a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
index 16bf23e..8db3167 100644
--- a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
+++ b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare {i32, i32} @foo()
 
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
index 52e025900..e9fa552 100644
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_t.*&1}
+; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
 define i32 @test(i32 %r) {
   %s = icmp eq i32 %r, 0
   %t = add i1 %s, %s
diff --git a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
index a2c1046..054a3ca 100644
--- a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
+++ b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {"m"(llvm_cbe_newcw))}
+; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
 ; PR2407
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
index 32d635a..b72b573 100644
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR2907
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9.5"
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
index 68849b2..7dec3d9 100644
--- a/test/CodeGen/CBackend/fneg.ll
+++ b/test/CodeGen/CBackend/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @func() nounwind {
   entry:
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
index a16f91b..bf8477b 100644
--- a/test/CodeGen/CBackend/pr2408.ll
+++ b/test/CodeGen/CBackend/pr2408.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* ((unsigned int )}
+; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
 ; PR2408
 
 define i32 @a(i32 %a) {
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
index d01e992..b7b7677 100644
--- a/test/CodeGen/CBackend/vectors.ll
+++ b/test/CodeGen/CBackend/vectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @.str15 = external global [2 x i8]
 
 define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 75b96e6..71fea12 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
+; RUN: llc < %s -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
 ; PR1515
 
 define void @foo() {
diff --git a/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index 9d132ec..0b2d882 100644
--- a/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 
 define x86_fp80 @some_func() nounwind {
 entry:
diff --git a/test/CodeGen/CPP/2009-05-04-CondBr.ll b/test/CodeGen/CPP/2009-05-04-CondBr.ll
index 6c3f984..feb2cf7 100644
--- a/test/CodeGen/CPP/2009-05-04-CondBr.ll
+++ b/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 ; RUN: grep "BranchInst::Create(label_if_then, label_if_end, int1_cmp, label_entry);" %t
 
 define i32 @some_func(i32 %a) nounwind {
diff --git a/test/CodeGen/CPP/llvm2cpp.ll b/test/CodeGen/CPP/llvm2cpp.ll
index 651a65b..447f332 100644
--- a/test/CodeGen/CPP/llvm2cpp.ll
+++ b/test/CodeGen/CPP/llvm2cpp.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis > /dev/null
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -o -
+; RUN: llc < %s -march=cpp -cppgen=program -o -
 
 @X = global i32 4, align 16		; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
index 75e0ed0..3542231 100644
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu -o - | grep brz
+; RUN: llc < %s -march=cellspu -o - | grep brz
 ; PR3274
 
 target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index a18b6f8..139e97b 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 234
 ; RUN: grep andc   %t1.s | count 85
 ; RUN: grep andi   %t1.s | count 37
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index a305a23..960d2fe 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep brsl    %t1.s | count 1
 ; RUN: grep brasl   %t1.s | count 1
 ; RUN: grep stqd    %t1.s | count 80
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 9be714e..639c794 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
 ; RUN: grep rotqby  %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
index 3c7ee7a..e1a6cd8 100644
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cntb    %t1.s | count 3
 ; RUN: grep andi    %t1.s | count 3
 ; RUN: grep rotmi   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index d4802ae..b0a372b 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep dfa    %t1.s | count 2
 ; RUN: grep dfs    %t1.s | count 2
 ; RUN: grep dfm    %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
index 5406956..22c8c3b 100644
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep eqv  %t1.s | count 18
 ; RUN: grep xshw %t1.s | count 6
 ; RUN: grep xsbh %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
index bcd2f42..0ac971c 100644
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 39
 ; RUN: grep ilhu    %t1.s | count 27
 ; RUN: grep iohl    %t1.s | count 27
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index 27a659e..f07fe6f 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fceq  %t1.s | count 1
 ; RUN: grep fcmeq %t1.s | count 1
 
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
index 1906bfe..2b61fa6 100644
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ b/test/CodeGen/CellSPU/fcmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
 entry:
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
index d121c3f..9921626 100644
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep frest    %t1.s | count 2 
 ; RUN: grep -w fi    %t1.s | count 2 
 ; RUN: grep -w fm    %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 5bd66f4..1e5e3b3 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep 32768   %t1.s | count 2
 ; RUN: grep xor     %t1.s | count 4
 ; RUN: grep and     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index dd67827..3553cbb 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep xswd	     %t1.s | count 3
 ; RUN: grep xsbh	     %t1.s | count 1
 ; RUN: grep xshw	     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
index 23a036e..57a2aa8 100644
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ b/test/CodeGen/CellSPU/i8ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 ; ModuleID = 'i8ops.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
index 56d1b8f..32b1261 100644
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilh                                %t1.s | count 15
 ; RUN: grep ceqh                               %t1.s | count 29
 ; RUN: grep ceqhi                              %t1.s | count 13
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
index 4f74b0d..ccbb5f7 100644
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ila                                %t1.s | count 6
 ; RUN: grep ceq                                %t1.s | count 28
 ; RUN: grep ceqi                               %t1.s | count 12
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
index b26252c..9dd2cdc 100644
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ b/test/CodeGen/CellSPU/icmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq                                %t1.s | count 20
 ; RUN: grep cgti                               %t1.s | count 12
 ; RUN: grep cgt                                %t1.s | count 16
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
index d246481..5517d10 100644
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceqb                               %t1.s | count 24
 ; RUN: grep ceqbi                              %t1.s | count 12
 ; RUN: grep clgtb                              %t1.s | count 11
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
index 9a461cb..077d071 100644
--- a/test/CodeGen/CellSPU/immed16.ll
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep "ilh" %t1.s | count 11
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index bf471b1..119f526 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilhu  %t1.s | count 8
 ; RUN: grep iohl  %t1.s | count 6
 ; RUN: grep -w il    %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
index bbda3ff..fd48365 100644
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep lqa        %t1.s | count 13
 ; RUN: grep ilhu       %t1.s | count 15
 ; RUN: grep ila        %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
index ee30765..984c017 100644
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep csflt %t1.s | count 5
 ; RUN: grep cuflt %t1.s | count 1
 ; RUN: grep xshw  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
index 87ad182..b0f6a62 100644
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq     %t1.s | count 30 
 ; RUN: grep ceqb    %t1.s | count 10
 ; RUN: grep ceqhi   %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
index c18f8de..8137347 100644
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fa      %t1.s | count 5
 ; RUN: grep fs      %t1.s | count 5
 ; RUN: grep fm      %t1.s | count 15
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
index 843340b..a29ee4c 100644
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and       %t1.s | count 20
 ; RUN: grep andc      %t1.s | count 5
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index 3b9746c..8e5422c 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep {lqd.*0(\$3)}   %t1.s | count 1
-; RUN: grep {lqd.*16(\$3)}  %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 ; ModuleID = 'loads.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -10,11 +8,13 @@ define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
 entry:
 	%tmp1 = load <4 x float>* %a
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
 }
 
 define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
 entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1		; <<4 x float>*> [#uses=1]
-	%tmp1 = load <4 x float>* %arrayidx		; <<4 x float>> [#uses=1]
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
 }
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
index 755b99b..d15da12 100644
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ b/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu
+; RUN: llc < %s -march=cellspu
 
 declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
 define i1 @a(i16 %x) zeroext nounwind {
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
index 085ce55..031d6c3 100644
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep mpy     %t1.s | count 44
 ; RUN: grep mpyu    %t1.s | count 4
 ; RUN: grep mpyh    %t1.s | count 10
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index 841a3ec..e141923 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep nand   %t1.s | count 90
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4e9da8f..8aa1e99 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 2
 ; RUN: grep orc    %t1.s | count 85
 ; RUN: grep ori    %t1.s | count 30
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 91567ce..7452276 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=cellspu > %t
+; RUN: llc < %s -march=cellspu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep brsl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index e308172..a504c00 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s
+; RUN: llc < %s -march=cellspu -o %t1.s
 ; RUN: grep rot          %t1.s | count 85
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index e83e476..c804256 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
new file mode 100644
index 0000000..0c0b359
--- /dev/null
+++ b/test/CodeGen/CellSPU/sext128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s 
+
+; ModuleID = 'sext128.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define i128 @sext_i64_i128(i64 %a) {
+entry:
+        %0 = sext i64 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	long	67438087
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128(i32 %a) {
+entry:
+        %0 = sext i32 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128a(float %a) {
+entry:
+  %0 = call i32 @myfunc(float %a)
+  %1 = sext i32 %0 to i128
+  ret i128 %1
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+declare i32 @myfunc(float)
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3c26baa..0264fc8 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 9
 ; RUN: grep {shlhi	}  %t1.s | count 3
 ; RUN: grep {shl	}  %t1.s | count 9
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index d77dd92..80bf47c 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
 ; RUN: grep fa %t1.s | count 2
 ; RUN: grep fs %t1.s | count 2
 ; RUN: grep fm %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index f2f35ef..05f44f4 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
 ; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
 ; RUN: grep 16256               %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index 82d319d..8ee7d93 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep lqa     %t1.s | count 5
 ; RUN: grep lqd     %t1.s | count 11
 ; RUN: grep rotqbyi %t1.s | count 7
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index db22564..d161852 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 19
 ; RUN: grep {ilhu.*1799}  %t1.s | count 1
 ; RUN: grep {ilhu.*771}  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
index 4b29adc..24c05c6 100644
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ b/test/CodeGen/CellSPU/vec_const.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep -w il  %t1.s | count 3
 ; RUN: grep ilhu   %t1.s | count 8
 ; RUN: grep -w ilh %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
index 6abbd9a..9a00c1f 100644
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ b/test/CodeGen/CellSPU/vecinsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cbd     %t1.s | count 5
 ; RUN: grep chd     %t1.s | count 5
 ; RUN: grep cwd     %t1.s | count 10
diff --git a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
index f6d95cb..dd382cf 100644
--- a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
+++ b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc 
+; RUN: llc < %s
 
 ; This caused the backend to assert out with:
 ; SparcInstrInfo.cpp:103: failed assertion `0 && "Unexpected unsigned type"'
diff --git a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
index d77b9e1..751ed40 100644
--- a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
+++ b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Compiling this file produces:
 ; Sparc.cpp:91: failed assertion `(offset - OFFSET) % getStackFrameSizeAlignment() == 0'
diff --git a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
index cf17ef4..6fb1799 100644
--- a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
+++ b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
index 03b2a16..14bb000 100644
--- a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
+++ b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
         %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
index b456eeb..cc0eb5c 100644
--- a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
+++ b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
index 595700a..c6fbdae 100644
--- a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
+++ b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/175.vpr.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
index 41c90bd..10d3a11 100644
--- a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/254.gap.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
index 43bff82..f7c3e42 100644
--- a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/SingleSource/richards_benchmark.c
diff --git a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
index d66ea18..1d1aad5 100644
--- a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
+++ b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date: May 28, 2003.
 ;; From: test/Programs/MultiSource/Olden-perimeter/maketree.c
diff --git a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
index 80738d5..64312ba 100644
--- a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
+++ b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [42 x i8] c"   ui = %u (0x%x)\09\09UL-ui = %lld (0x%llx)\0A\00"             ; <[42 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
index 4532b76..8019caa 100644
--- a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
+++ b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 8, 2003.
 ;; From:     test/Programs/MultiSource/Olden-perimeter
diff --git a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 54880db..4e6fe1c 100644
--- a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 29, 2003.
 ;; From:     test/Programs/MultiSource/Ptrdist-bc
diff --git a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
index 10d4069..393062a 100644
--- a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
+++ b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-correct-eh-support
+; RUN: llc < %s -enable-correct-eh-support
 
 define i32 @test() {
         unwind
diff --git a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
index 1f58ce1..d4a4cf8 100644
--- a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
+++ b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @global_long_1 = linkonce global i64 7          ; <i64*> [#uses=1]
 @global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
index ed8b2a2..7fd2361 100644
--- a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
+++ b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @intersect_pixel() {
 entry:
diff --git a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
index 37aaa32..353e411 100644
--- a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
+++ b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
         %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
 @.str_67 = external global [4 x i8]             ; <[4 x i8]*> [#uses=1]
 @.str_87 = external global [17 x i8]            ; <[17 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
index ab3a31d..733202c 100644
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Test that llvm.memcpy works with a i64 length operand on all targets.
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
index b2bea1c..08060bf 100644
--- a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
+++ b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test() {
         %X = alloca {  }                ; <{  }*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
index b355b02..53a9cd0 100644
--- a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
+++ b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define float @t(i64 %u_arg) {
         %u = bitcast i64 %u_arg to i64          ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-12-01-Crash.ll b/test/CodeGen/Generic/2005-12-01-Crash.ll
index ee72ee1..a9eedde 100644
--- a/test/CodeGen/Generic/2005-12-01-Crash.ll
+++ b/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @str = external global [36 x i8]		; <[36 x i8]*> [#uses=0]
 @str.upgrd.1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
 @str1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
index bd2e043..349540f 100644
--- a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
+++ b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test(i64 %A) {
         %B = trunc i64 %A to i8         ; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
index 1a555b3..42e8ed0 100644
--- a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
+++ b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; ModuleID = '2006-01-12-BadSetCCFold.ll'
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
index b1e08c7..f06d341 100644
--- a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
+++ b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; This crashed the PPC backend.
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
index bacf8b5..5508272 100644
--- a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
+++ b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @G = external global i32		; <i32*> [#uses=1]
 
 define void @encode_one_frame(i64 %tmp.2i) {
diff --git a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
index 9607ebe..2a6cc0c 100644
--- a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
+++ b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Infinite loop in the dag combiner, reduced from 176.gcc.	
 %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.anon = type { i32 }
diff --git a/test/CodeGen/Generic/2006-04-11-vecload.ll b/test/CodeGen/Generic/2006-04-11-vecload.ll
index cc96d8f..a68ed83 100644
--- a/test/CodeGen/Generic/2006-04-11-vecload.ll
+++ b/test/CodeGen/Generic/2006-04-11-vecload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; The vload was getting memoized to the previous scalar load!
 
diff --git a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
index b99aa98..8465b82 100644
--- a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
+++ b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR748
 @G = external global i16		; <i16*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
index 6b9bf11..22d8f99 100644
--- a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
+++ b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %tmp93) {
         %tmp98 = shl i32 %tmp93, 31             ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
index 59ed295..1a9fa9f 100644
--- a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
+++ b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.SYMBOL_TABLE_ENTRY = type { [9 x i8], [9 x i8], i32, i32, i32, %struct.SYMBOL_TABLE_ENTRY* }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
index b644bd2..a3720a9 100644
--- a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
+++ b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 
 define float @test(i32 %tmp12771278) {
         switch i32 %tmp12771278, label %bb1279 [
diff --git a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
index 1aa3c62..bd922b3 100644
--- a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
+++ b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 	
 %struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32 }
 @cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
index 8e8f186..c4f2fb0 100644
--- a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
+++ b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
 @ix86_cpu = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-07-03-schedulers.ll b/test/CodeGen/Generic/2006-07-03-schedulers.ll
index 597ee56..756bd5d 100644
--- a/test/CodeGen/Generic/2006-07-03-schedulers.ll
+++ b/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -pre-RA-sched=default
-; RUN: llvm-as < %s | llc -pre-RA-sched=list-burr
-; RUN: llvm-as < %s | llc -pre-RA-sched=fast
+; RUN: llc < %s -pre-RA-sched=default
+; RUN: llc < %s -pre-RA-sched=list-burr
+; RUN: llc < %s -pre-RA-sched=fast
 ; PR859
 
 ; The top-down schedulers are excluded here because they don't yet support
diff --git a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
index 7f8af5d..cbe8b15 100644
--- a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
+++ b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index c6d0dfe..4b332b3 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local
+; RUN: llc < %s -regalloc=local
 	
 %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
 @search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
index 2134d33..3d592b3 100644
--- a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
+++ b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
 	br label %cond_true813.i
diff --git a/test/CodeGen/Generic/2006-10-27-CondFolding.ll b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
index b3cfb99..51902c8 100644
--- a/test/CodeGen/Generic/2006-10-27-CondFolding.ll
+++ b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define void @start_pass_huff(i32 %gather_statistics) {
 entry:
diff --git a/test/CodeGen/Generic/2006-10-29-Crash.ll b/test/CodeGen/Generic/2006-10-29-Crash.ll
index cabec54..7dcb52c 100644
--- a/test/CodeGen/Generic/2006-10-29-Crash.ll
+++ b/test/CodeGen/Generic/2006-10-29-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @form_component_prediction(i32 %dy) {
 entry:
diff --git a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
index a773759..ad3e49f 100644
--- a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
+++ b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | not grep adc
 ; PR987
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
index 95ef53c..26d0f4f 100644
--- a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
+++ b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1011	
 %struct.mng_data = type { i8* (%struct.mng_data*, i32)*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
index 91ac3b9..50a244b 100644
--- a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
+++ b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1049
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 49203d9..255b120 100644
--- a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1114
 
 declare i1 @foo()
diff --git a/test/CodeGen/Generic/2007-02-16-BranchFold.ll b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
index 0a8e49e..6bf5631 100644
--- a/test/CodeGen/Generic/2007-02-16-BranchFold.ll
+++ b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
@@ -1,5 +1,5 @@
 ; PR 1200
-; RUN: llvm-as < %s | llc -enable-tail-merge=0 | not grep jmp 
+; RUN: llc < %s -enable-tail-merge=0 | not grep jmp 
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
index 8b7db47..a8f0e57 100644
--- a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
+++ b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
@@ -1,5 +1,5 @@
 ; PR1219
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
 
 define i32 @test(i1 %X) {
 old_entry1:
diff --git a/test/CodeGen/Generic/2007-02-25-invoke.ll b/test/CodeGen/Generic/2007-02-25-invoke.ll
index 6dba99e..6e20eaa 100644
--- a/test/CodeGen/Generic/2007-02-25-invoke.ll
+++ b/test/CodeGen/Generic/2007-02-25-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; PR1224
 
diff --git a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
index 9cbf314..339f0f7 100644
--- a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
+++ b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: sparc-sun-solaris2
 ; PR1308
 ; PR1557
diff --git a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
index 1418bbf..a0b1403c 100644
--- a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
+++ b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 ; PR 1323
 
 ; ModuleID = 'test.bc'
diff --git a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
index 5490687..00337b9 100644
--- a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 8388635
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4294981120
+; RUN: llc < %s -march=x86 | grep 8388635
+; RUN: llc < %s -march=x86-64 | grep 4294981120
 ; PR 1325
 
 ; ModuleID = 'bugpoint.test.bc'
diff --git a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
index 4257e9f..98f87e5 100644
--- a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
+++ b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(i32 %inTextSize) {
 entry:
diff --git a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
index 16d7a16..3e8857f 100644
--- a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep je | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4297064449
+; RUN: llc < %s -march=x86 | grep je | count 3
+; RUN: llc < %s -march=x86-64 | grep 4297064449
 ; PR 1325+
 
 define i32 @foo(i8 %bar) {
diff --git a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
index 0ea13a2..af522dc 100644
--- a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
+++ b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that we can have an "X" output constraint.
 
diff --git a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
index 9424ea7..f2c9b7f 100644
--- a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
+++ b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %struct..0anon = type { [100 x i32] }
 
diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
index 71b4c85..568b88f 100644
--- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
+++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1228
 
 	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
diff --git a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
index 8a42790..533aa4a 100644
--- a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
+++ b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -march=x86
+; RUN: llc < %s -enable-eh -march=x86
 
 	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
 @program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
diff --git a/test/CodeGen/Generic/2007-05-05-Personality.ll b/test/CodeGen/Generic/2007-05-05-Personality.ll
index 0fa0e2f..2749326 100644
--- a/test/CodeGen/Generic/2007-05-05-Personality.ll
+++ b/test/CodeGen/Generic/2007-05-05-Personality.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
index a61108a..b989819 100644
--- a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
+++ b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
index 0b98ebe..33a3645 100644
--- a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
+++ b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-eh -asm-verbose -o - | \
+; RUN: llc < %s -march=x86 -enable-eh -asm-verbose -o - | \
 ; RUN:   grep -A 3 {Llabel138.*Region start} | grep {3.*Action}
 ; PR1422
 ; PR1508
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
index cedee6f..e220be6 100644
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o -
+; RUN: llc < %s -o -
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
index 98871d0..bd26481 100644
--- a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
+++ b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 41fdb71..fc9164f 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ; PR1833
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
index 4b25444..314bb05 100644
--- a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
+++ b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://5707064
 
 define i32 @f(i16* %pc) {
diff --git a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
index 8ed4139..70c3aaa 100644
--- a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
+++ b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @letters.3100 = external constant [63 x i8]		; <[63 x i8]*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 4639b6f..288bfd2 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str = internal constant [14 x i8] c"%lld %d %d %d\00"
 
diff --git a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
index 9acb852..8bf82df 100644
--- a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
+++ b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index ef60f92..da1aeb5 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1133
 define void @test(i32* %X) nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 0169307..97db667 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://5763967
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
index b2112f3..10b3d44 100644
--- a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
+++ b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2096
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
index a60d101..4f95dfe 100644
--- a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
+++ b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2603
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index c584402..6281ada 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3806
 
 	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 40ad3de..9a9c1a1 100644
--- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -soft-float
+; RUN: llc < %s -soft-float
 ; PR3899
 
 @m = external global <2 x double>;
diff --git a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
index 3637a06..125f875 100644
--- a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
+++ b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index 405a6a8..577b547 100644
--- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6836460
 
 define i32 @test(i128* %P) nounwind {
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
index 59e7d0c..112cac4 100644
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4317
 
 declare i32 @b()
diff --git a/test/CodeGen/Generic/APIntLoadStore.ll b/test/CodeGen/Generic/APIntLoadStore.ll
index 57ddae2..7c71a33 100644
--- a/test/CodeGen/Generic/APIntLoadStore.ll
+++ b/test/CodeGen/Generic/APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_l = external global i1		; <i1*> [#uses=1]
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_l = external global i2		; <i2*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntParam.ll b/test/CodeGen/Generic/APIntParam.ll
index f80f71b..8aa0b49 100644
--- a/test/CodeGen/Generic/APIntParam.ll
+++ b/test/CodeGen/Generic/APIntParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntSextParam.ll b/test/CodeGen/Generic/APIntSextParam.ll
index 9fb06cb..acc0eeb 100644
--- a/test/CodeGen/Generic/APIntSextParam.ll
+++ b/test/CodeGen/Generic/APIntSextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntZextParam.ll b/test/CodeGen/Generic/APIntZextParam.ll
index ea7743e..173b9fd 100644
--- a/test/CodeGen/Generic/APIntZextParam.ll
+++ b/test/CodeGen/Generic/APIntZextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/BasicInstrs.ll b/test/CodeGen/Generic/BasicInstrs.ll
index e65cbf7..578431e 100644
--- a/test/CodeGen/Generic/BasicInstrs.ll
+++ b/test/CodeGen/Generic/BasicInstrs.ll
@@ -1,7 +1,7 @@
 ; New testcase, this contains a bunch of simple instructions that should be
 ; handled by a code generator.
 
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @add(i32 %A, i32 %B) {
 	%R = add i32 %A, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/BurgBadRegAlloc.ll b/test/CodeGen/Generic/BurgBadRegAlloc.ll
index 3ccc9a0..99d856a 100644
--- a/test/CodeGen/Generic/BurgBadRegAlloc.ll
+++ b/test/CodeGen/Generic/BurgBadRegAlloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Register allocation is doing a very poor job on this routine from yyparse
 ;; in Burg:
diff --git a/test/CodeGen/Generic/ConstantExprLowering.ll b/test/CodeGen/Generic/ConstantExprLowering.ll
index d265415..428d712 100644
--- a/test/CodeGen/Generic/ConstantExprLowering.ll
+++ b/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [16 x i8] c"%d %d %d %d %d\0A\00"           ; <[16 x i8]*> [#uses=1]
 @XA = external global i32               ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/GC/alloc_loop.ll b/test/CodeGen/Generic/GC/alloc_loop.ll
index b1fee68..fb78ba2 100644
--- a/test/CodeGen/Generic/GC/alloc_loop.ll
+++ b/test/CodeGen/Generic/GC/alloc_loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 declare i8* @llvm_gc_allocate(i32)
diff --git a/test/CodeGen/Generic/GC/argpromotion.ll b/test/CodeGen/Generic/GC/argpromotion.ll
index 5df947a..dda376d 100644
--- a/test/CodeGen/Generic/GC/argpromotion.ll
+++ b/test/CodeGen/Generic/GC/argpromotion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -argpromotion
+; RUN: opt < %s -anders-aa -argpromotion
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/deadargelim.ll b/test/CodeGen/Generic/GC/deadargelim.ll
index c5a56f6..1760190 100644
--- a/test/CodeGen/Generic/GC/deadargelim.ll
+++ b/test/CodeGen/Generic/GC/deadargelim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim
+; RUN: opt < %s -deadargelim
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/frame_size.ll b/test/CodeGen/Generic/GC/frame_size.ll
index 75626c1..31783cd 100644
--- a/test/CodeGen/Generic/GC/frame_size.ll
+++ b/test/CodeGen/Generic/GC/frame_size.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -asm-verbose | grep {frame size} | grep -v 0x0
+; RUN: llc < %s -asm-verbose | grep {frame size} | grep -v 0x0
 
 declare void @llvm.gcroot(i8** %value, i8* %tag)
 declare void @g() gc "ocaml"
diff --git a/test/CodeGen/Generic/GC/inline.ll b/test/CodeGen/Generic/GC/inline.ll
index 157e19d..9da33ae 100644
--- a/test/CodeGen/Generic/GC/inline.ll
+++ b/test/CodeGen/Generic/GC/inline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/inline2.ll b/test/CodeGen/Generic/GC/inline2.ll
index b45ef7c..1594705 100644
--- a/test/CodeGen/Generic/GC/inline2.ll
+++ b/test/CodeGen/Generic/GC/inline2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep sample
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep sample
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/lower_gcroot.ll b/test/CodeGen/Generic/GC/lower_gcroot.ll
index bd5a2bd..c2d418a 100644
--- a/test/CodeGen/Generic/GC/lower_gcroot.ll
+++ b/test/CodeGen/Generic/GC/lower_gcroot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%Env = type i8*
 
diff --git a/test/CodeGen/Generic/GC/redundant_init.ll b/test/CodeGen/Generic/GC/redundant_init.ll
index 4499603..10c70e7 100644
--- a/test/CodeGen/Generic/GC/redundant_init.ll
+++ b/test/CodeGen/Generic/GC/redundant_init.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   ignore grep {movl..0} | count 0
 
 %struct.obj = type { i8*, %struct.obj* }
diff --git a/test/CodeGen/Generic/GC/simple_ocaml.ll b/test/CodeGen/Generic/GC/simple_ocaml.ll
index a33e035..f765dc0 100644
--- a/test/CodeGen/Generic/GC/simple_ocaml.ll
+++ b/test/CodeGen/Generic/GC/simple_ocaml.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep caml.*__frametable
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	.0}
+; RUN: llc < %s | grep caml.*__frametable
+; RUN: llc < %s -march=x86 | grep {movl	.0}
 
 %struct.obj = type { i8*, %struct.obj* }
 
diff --git a/test/CodeGen/Generic/Makefile b/test/CodeGen/Generic/Makefile
index d228f69..26ebc31 100644
--- a/test/CodeGen/Generic/Makefile
+++ b/test/CodeGen/Generic/Makefile
@@ -1,10 +1,10 @@
 # Makefile for running ad-hoc custom LLVM tests
 #
 %.bc: %.ll
-	llvm-as -f $< 
+	llvm-as $< 
 	
 %.llc.s: %.bc
-	llc -f $< -o $@ 
+	llc $< -o $@ 
 
 %.gcc.s: %.c
 	gcc -O0 -S $< -o $@
diff --git a/test/CodeGen/Generic/SwitchLowering.ll b/test/CodeGen/Generic/SwitchLowering.ll
index 9fdfd8d..29a0e82 100644
--- a/test/CodeGen/Generic/SwitchLowering.ll
+++ b/test/CodeGen/Generic/SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
 ; PR964
 
 define i8* @FindChar(i8* %CurPtr) {
diff --git a/test/CodeGen/Generic/add-with-overflow-24.ll b/test/CodeGen/Generic/add-with-overflow-24.ll
index debdeb2..63f5a22 100644
--- a/test/CodeGen/Generic/add-with-overflow-24.ll
+++ b/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/add-with-overflow.ll b/test/CodeGen/Generic/add-with-overflow.ll
index 5c3d540..0c2c960 100644
--- a/test/CodeGen/Generic/add-with-overflow.ll
+++ b/test/CodeGen/Generic/add-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s
+; RUN: llc < %s -fast-isel
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/addc-fold2.ll b/test/CodeGen/Generic/addc-fold2.ll
index 8f3cdd0..34f5ac1 100644
--- a/test/CodeGen/Generic/addc-fold2.ll
+++ b/test/CodeGen/Generic/addc-fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | grep add
+; RUN: llc < %s -march=x86 | not grep adc
 
 define i64 @test(i64 %A, i32 %B) {
         %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll
index 7064913..605665b 100644
--- a/test/CodeGen/Generic/asm-large-immediate.ll
+++ b/test/CodeGen/Generic/asm-large-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep 68719476738
+; RUN: llc < %s | grep 68719476738
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/Generic/badCallArgLRLLVM.ll b/test/CodeGen/Generic/badCallArgLRLLVM.ll
index 5638474..4ed88df 100644
--- a/test/CodeGen/Generic/badCallArgLRLLVM.ll
+++ b/test/CodeGen/Generic/badCallArgLRLLVM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; This caused a problem because the argument of a call was defined by
 ; the return value of another call that appears later in the code.
diff --git a/test/CodeGen/Generic/badFoldGEP.ll b/test/CodeGen/Generic/badFoldGEP.ll
index 8de1251..2d4474b 100644
--- a/test/CodeGen/Generic/badFoldGEP.ll
+++ b/test/CodeGen/Generic/badFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; GetMemInstArgs() folded the two getElementPtr instructions together,
 ;; producing an illegal getElementPtr.  That's because the type generated
diff --git a/test/CodeGen/Generic/badarg6.ll b/test/CodeGen/Generic/badarg6.ll
index 1ff7df4..d6e5ac5 100644
--- a/test/CodeGen/Generic/badarg6.ll
+++ b/test/CodeGen/Generic/badarg6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; On this code, llc did not pass the sixth argument (%reg321) to printf.
 ; It passed the first five in %o0 - %o4, but never initialized %o5.
diff --git a/test/CodeGen/Generic/badlive.ll b/test/CodeGen/Generic/badlive.ll
index 0114fb0..43b03e3 100644
--- a/test/CodeGen/Generic/badlive.ll
+++ b/test/CodeGen/Generic/badlive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {
 bb0:
diff --git a/test/CodeGen/Generic/bool-to-double.ll b/test/CodeGen/Generic/bool-to-double.ll
index d6c9e52..81350a4 100644
--- a/test/CodeGen/Generic/bool-to-double.ll
+++ b/test/CodeGen/Generic/bool-to-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define double @test(i1 %X) {
         %Y = uitofp i1 %X to double             ; <double> [#uses=1]
         ret double %Y
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
index e0f2a708..4758697 100644
--- a/test/CodeGen/Generic/bool-vector.ll
+++ b/test/CodeGen/Generic/bool-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1845
 
 define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
diff --git a/test/CodeGen/Generic/call-ret0.ll b/test/CodeGen/Generic/call-ret0.ll
index 7ab966b..a8e00cd 100644
--- a/test/CodeGen/Generic/call-ret0.ll
+++ b/test/CodeGen/Generic/call-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @foo(i32 %x) {
         ret i32 %x
 }
diff --git a/test/CodeGen/Generic/call-ret42.ll b/test/CodeGen/Generic/call-ret42.ll
index ac9bd92..95cc286 100644
--- a/test/CodeGen/Generic/call-ret42.ll
+++ b/test/CodeGen/Generic/call-ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @foo(i32 %x) {
         ret i32 42
diff --git a/test/CodeGen/Generic/call-void.ll b/test/CodeGen/Generic/call-void.ll
index b882689..9ed4179 100644
--- a/test/CodeGen/Generic/call-void.ll
+++ b/test/CodeGen/Generic/call-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
         ret void
diff --git a/test/CodeGen/Generic/call2-ret0.ll b/test/CodeGen/Generic/call2-ret0.ll
index 8c7e892..4e57ef8 100644
--- a/test/CodeGen/Generic/call2-ret0.ll
+++ b/test/CodeGen/Generic/call2-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @bar(i32 %x) {
         ret i32 0
diff --git a/test/CodeGen/Generic/cast-fp.ll b/test/CodeGen/Generic/cast-fp.ll
index 5f05d85..590b7ce 100644
--- a/test/CodeGen/Generic/cast-fp.ll
+++ b/test/CodeGen/Generic/cast-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_fstr = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_lstr = internal constant [10 x i8] c"a = %lld\0A\00"		; <[10 x i8]*> [#uses=1]
 @a_dstr = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/constindices.ll b/test/CodeGen/Generic/constindices.ll
index 6366fd5..7deb30f 100644
--- a/test/CodeGen/Generic/constindices.ll
+++ b/test/CodeGen/Generic/constindices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a sequence of constant indices are folded correctly
 ; into the equivalent offset at compile-time.
diff --git a/test/CodeGen/Generic/debug-info.ll b/test/CodeGen/Generic/debug-info.ll
index d1bb66d..20d9f91 100644
--- a/test/CodeGen/Generic/debug-info.ll
+++ b/test/CodeGen/Generic/debug-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %lldb.compile_unit = type { i32, i16, i16, i8*, i8*, i8*, {  }* }
 @d.compile_unit7 = external global %lldb.compile_unit           ; <%lldb.compile_unit*> [#uses=1]
diff --git a/test/CodeGen/Generic/div-neg-power-2.ll b/test/CodeGen/Generic/div-neg-power-2.ll
index 3bc4899..246cd03 100644
--- a/test/CodeGen/Generic/div-neg-power-2.ll
+++ b/test/CodeGen/Generic/div-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %X) {
         %Y = sdiv i32 %X, -2            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/empty-load-store.ll b/test/CodeGen/Generic/empty-load-store.ll
index d7bb371..bca7305 100644
--- a/test/CodeGen/Generic/empty-load-store.ll
+++ b/test/CodeGen/Generic/empty-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2612
 
 @current_foo = internal global {  } zeroinitializer
diff --git a/test/CodeGen/Generic/externally_available.ll b/test/CodeGen/Generic/externally_available.ll
index 73b6b98..7976cc9 100644
--- a/test/CodeGen/Generic/externally_available.ll
+++ b/test/CodeGen/Generic/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep test_
+; RUN: llc < %s | not grep test_
 
 ; test_function should not be emitted to the .s file.
 define available_externally i32 @test_function() {
diff --git a/test/CodeGen/Generic/fastcall.ll b/test/CodeGen/Generic/fastcall.ll
index 65e66c7..35e04f1 100644
--- a/test/CodeGen/Generic/fastcall.ll
+++ b/test/CodeGen/Generic/fastcall.ll
@@ -1,5 +1,5 @@
 ; Test fastcc works. Test from bug 2770.
-; RUN: llvm-as < %s | llc -relocation-model=pic
+; RUN: llc < %s -relocation-model=pic
 
 
 %struct.__gcov_var = type {  i32 }
diff --git a/test/CodeGen/Generic/fneg-fabs.ll b/test/CodeGen/Generic/fneg-fabs.ll
index 2709fa1..2f2f597 100644
--- a/test/CodeGen/Generic/fneg-fabs.ll
+++ b/test/CodeGen/Generic/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @fneg(double %X) {
         %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
diff --git a/test/CodeGen/Generic/fp-to-int-invalid.ll b/test/CodeGen/Generic/fp-to-int-invalid.ll
index 73176b1..cdcc3a2 100644
--- a/test/CodeGen/Generic/fp-to-int-invalid.ll
+++ b/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4057
 define void @test_cast_float_to_char(i8* %result) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/fp_to_int.ll b/test/CodeGen/Generic/fp_to_int.ll
index 609de65..ad94413 100644
--- a/test/CodeGen/Generic/fp_to_int.ll
+++ b/test/CodeGen/Generic/fp_to_int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i8 @test1(double %X) {
 	%tmp.1 = fptosi double %X to i8		; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/fpowi-promote.ll b/test/CodeGen/Generic/fpowi-promote.ll
index 55c2d2a..82628ef 100644
--- a/test/CodeGen/Generic/fpowi-promote.ll
+++ b/test/CodeGen/Generic/fpowi-promote.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s
+; RUN: llc < %s -march=x86 -mcpu=i386
 
 ; PR1239
 
diff --git a/test/CodeGen/Generic/fwdtwice.ll b/test/CodeGen/Generic/fwdtwice.ll
index 05e831a..6b38f04 100644
--- a/test/CodeGen/Generic/fwdtwice.ll
+++ b/test/CodeGen/Generic/fwdtwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;;
 ;; Test the sequence:
diff --git a/test/CodeGen/Generic/getresult-undef.ll b/test/CodeGen/Generic/getresult-undef.ll
index 7905ff5..c675535 100644
--- a/test/CodeGen/Generic/getresult-undef.ll
+++ b/test/CodeGen/Generic/getresult-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @foo() {
   %t = getresult {double, double} undef, 1
diff --git a/test/CodeGen/Generic/global-ret0.ll b/test/CodeGen/Generic/global-ret0.ll
index 8fcef33..74bff87 100644
--- a/test/CodeGen/Generic/global-ret0.ll
+++ b/test/CodeGen/Generic/global-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @g = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/hello.ll b/test/CodeGen/Generic/hello.ll
index 705423f..705945c 100644
--- a/test/CodeGen/Generic/hello.ll
+++ b/test/CodeGen/Generic/hello.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [7 x i8] c"hello\0A\00"             ; <[7 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/i128-addsub.ll b/test/CodeGen/Generic/i128-addsub.ll
index 10f0acc..e7cbf4a 100644
--- a/test/CodeGen/Generic/i128-addsub.ll
+++ b/test/CodeGen/Generic/i128-addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
 entry:
diff --git a/test/CodeGen/Generic/i128-arith.ll b/test/CodeGen/Generic/i128-arith.ll
index 9a67084..cf10463 100644
--- a/test/CodeGen/Generic/i128-arith.ll
+++ b/test/CodeGen/Generic/i128-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define i64 @foo(i64 %x, i64 %y, i32 %amt) {
         %tmp0 = zext i64 %x to i128
diff --git a/test/CodeGen/Generic/inline-asm-special-strings.ll b/test/CodeGen/Generic/inline-asm-special-strings.ll
index e52e0be..d18221e 100644
--- a/test/CodeGen/Generic/inline-asm-special-strings.ll
+++ b/test/CodeGen/Generic/inline-asm-special-strings.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "foo 0 0"
+; RUN: llc < %s | grep "foo 0 0"
 
 define void @bar() nounwind {
 	tail call void asm sideeffect "foo ${:uid} ${:uid}", ""() nounwind
diff --git a/test/CodeGen/Generic/intrinsics.ll b/test/CodeGen/Generic/intrinsics.ll
index 373bec9..9a42c3e 100644
--- a/test/CodeGen/Generic/intrinsics.ll
+++ b/test/CodeGen/Generic/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; SQRT
 declare float @llvm.sqrt.f32(float)
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index e3acf0c..8448565 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
diff --git a/test/CodeGen/Generic/isunord.ll b/test/CodeGen/Generic/isunord.ll
index 589f496..ebbba01 100644
--- a/test/CodeGen/Generic/isunord.ll
+++ b/test/CodeGen/Generic/isunord.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc
-; XFAIL: ia64
+; RUN: llc < %s
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 66f409e..1db7549 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase is supported by all code generators
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i64 @llvm.ctpop.i64(i64)
 
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index f21b645..282e973 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare { i64, double } @wild()
 
diff --git a/test/CodeGen/Generic/negintconst.ll b/test/CodeGen/Generic/negintconst.ll
index a2b3d69..67d775e 100644
--- a/test/CodeGen/Generic/negintconst.ll
+++ b/test/CodeGen/Generic/negintconst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a negative constant smaller than 64 bits (e.g., int)
 ; is correctly implemented with sign-extension.
diff --git a/test/CodeGen/Generic/nested-select.ll b/test/CodeGen/Generic/nested-select.ll
index a723a4d..f81fed3 100644
--- a/test/CodeGen/Generic/nested-select.ll
+++ b/test/CodeGen/Generic/nested-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o /dev/null -f
+; RUN: llc < %s -o /dev/null
 
 ; Test that select of a select works
 
diff --git a/test/CodeGen/Generic/phi-immediate-factoring.ll b/test/CodeGen/Generic/phi-immediate-factoring.ll
index e0f6759..9f9f921 100644
--- a/test/CodeGen/Generic/phi-immediate-factoring.ll
+++ b/test/CodeGen/Generic/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
 ; PR1296
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1} | count 1
+; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/Generic/pr2625.ll b/test/CodeGen/Generic/pr2625.ll
index c1f585d..3e3dc4b 100644
--- a/test/CodeGen/Generic/pr2625.ll
+++ b/test/CodeGen/Generic/pr2625.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2625
 
 define i32 @main({ i32, { i32 } }*) {
diff --git a/test/CodeGen/Generic/pr3288.ll b/test/CodeGen/Generic/pr3288.ll
index ff0384d..b62710f 100644
--- a/test/CodeGen/Generic/pr3288.ll
+++ b/test/CodeGen/Generic/pr3288.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3288
 
 define void @a() {
diff --git a/test/CodeGen/Generic/print-add.ll b/test/CodeGen/Generic/print-add.ll
index 4f1cb5e..95608dc 100644
--- a/test/CodeGen/Generic/print-add.ll
+++ b/test/CodeGen/Generic/print-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index 1e27061..d129ff8 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-arith-int.ll b/test/CodeGen/Generic/print-arith-int.ll
index cf27515..ce938cf 100644
--- a/test/CodeGen/Generic/print-arith-int.ll
+++ b/test/CodeGen/Generic/print-arith-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-int.ll b/test/CodeGen/Generic/print-int.ll
index 58f5047..7ca4b3d 100644
--- a/test/CodeGen/Generic/print-int.ll
+++ b/test/CodeGen/Generic/print-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-mul-exp.ll b/test/CodeGen/Generic/print-mul-exp.ll
index 0666775..90fc55b 100644
--- a/test/CodeGen/Generic/print-mul-exp.ll
+++ b/test/CodeGen/Generic/print-mul-exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-mul.ll b/test/CodeGen/Generic/print-mul.ll
index 1d9452a..0707f3c 100644
--- a/test/CodeGen/Generic/print-mul.ll
+++ b/test/CodeGen/Generic/print-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-shift.ll b/test/CodeGen/Generic/print-shift.ll
index 8992e8d..6c5d222 100644
--- a/test/CodeGen/Generic/print-shift.ll
+++ b/test/CodeGen/Generic/print-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/ret0.ll b/test/CodeGen/Generic/ret0.ll
index 489f31c..9e628a1 100644
--- a/test/CodeGen/Generic/ret0.ll
+++ b/test/CodeGen/Generic/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 0
diff --git a/test/CodeGen/Generic/ret42.ll b/test/CodeGen/Generic/ret42.ll
index 0cbe176..f5cd33d 100644
--- a/test/CodeGen/Generic/ret42.ll
+++ b/test/CodeGen/Generic/ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 42
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index 85e68d1..b653e2a 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2504
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index a532703..63052c1 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 %Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }
 @AConst = constant i32 123              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/shift-int64.ll b/test/CodeGen/Generic/shift-int64.ll
index 31be2d6..670ef20 100644
--- a/test/CodeGen/Generic/shift-int64.ll
+++ b/test/CodeGen/Generic/shift-int64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test_imm(i64 %X) {
         %Y = ashr i64 %X, 17            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/spillccr.ll b/test/CodeGen/Generic/spillccr.ll
index 8545133..0a774c6 100644
--- a/test/CodeGen/Generic/spillccr.ll
+++ b/test/CodeGen/Generic/spillccr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc
+; RUN: llc < %s
 
 ; July 6, 2002 -- LLC Regression test
 ; This test case checks if the integer CC register %xcc (or %ccr)
diff --git a/test/CodeGen/Generic/stack-protector.ll b/test/CodeGen/Generic/stack-protector.ll
index a11a714..a59c649 100644
--- a/test/CodeGen/Generic/stack-protector.ll
+++ b/test/CodeGen/Generic/stack-protector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_guard}
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_fail}
+; RUN: llc < %s -o - | grep {__stack_chk_guard}
+; RUN: llc < %s -o - | grep {__stack_chk_fail}
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/stacksave-restore.ll b/test/CodeGen/Generic/stacksave-restore.ll
index fd3dd67..b124b5f 100644
--- a/test/CodeGen/Generic/stacksave-restore.ll
+++ b/test/CodeGen/Generic/stacksave-restore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i8* @llvm.stacksave()
 
diff --git a/test/CodeGen/Generic/storetrunc-fp.ll b/test/CodeGen/Generic/storetrunc-fp.ll
index 0f7bb0b..7f7c7f7 100644
--- a/test/CodeGen/Generic/storetrunc-fp.ll
+++ b/test/CodeGen/Generic/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(double %a, double %b, float* %fp) {
 	%c = fadd double %a, %b
diff --git a/test/CodeGen/Generic/switch-crit-edge-constant.ll b/test/CodeGen/Generic/switch-crit-edge-constant.ll
index d71fe56..1f2ab0d 100644
--- a/test/CodeGen/Generic/switch-crit-edge-constant.ll
+++ b/test/CodeGen/Generic/switch-crit-edge-constant.ll
@@ -1,5 +1,5 @@
 ; PR925
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep mov.*str1 | count 1
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/switch-lower-feature-2.ll b/test/CodeGen/Generic/switch-lower-feature-2.ll
index 5e532a8..d6e5647 100644
--- a/test/CodeGen/Generic/switch-lower-feature-2.ll
+++ b/test/CodeGen/Generic/switch-lower-feature-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: grep jb %t | count 1
 ; RUN: grep \\\$6 %t | count 2
 ; RUN: grep 1024 %t | count 1
diff --git a/test/CodeGen/Generic/switch-lower-feature.ll b/test/CodeGen/Generic/switch-lower-feature.ll
index 0523401..65fdf5a 100644
--- a/test/CodeGen/Generic/switch-lower-feature.ll
+++ b/test/CodeGen/Generic/switch-lower-feature.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$7} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$6} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep 1024 | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep jb | count 2
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep je | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$7} | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$6} | count 1
+; RUN: llc < %s -march=x86 -o - | grep 1024 | count 1
+; RUN: llc < %s -march=x86 -o - | grep jb | count 2
+; RUN: llc < %s -march=x86 -o - | grep je | count 1
 
 define i32 @main(i32 %tmp158) {
 entry:
diff --git a/test/CodeGen/Generic/switch-lower.ll b/test/CodeGen/Generic/switch-lower.ll
index b1aad3f..eb240ed 100644
--- a/test/CodeGen/Generic/switch-lower.ll
+++ b/test/CodeGen/Generic/switch-lower.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1197
 
 
diff --git a/test/CodeGen/Generic/trap.ll b/test/CodeGen/Generic/trap.ll
index 4dfc1a6..67d1a7a 100644
--- a/test/CodeGen/Generic/trap.ll
+++ b/test/CodeGen/Generic/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index 44601d0..634b562 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 %f8 = type <8 x float>
 
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index 12104a3..a26918b 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2671
 
 define void @a(<2 x double>* %p, <2 x i8>* %q) {
diff --git a/test/CodeGen/Generic/vector-constantexpr.ll b/test/CodeGen/Generic/vector-constantexpr.ll
index 441c4a0..d8e0258 100644
--- a/test/CodeGen/Generic/vector-constantexpr.ll
+++ b/test/CodeGen/Generic/vector-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 	
 define void @""(float* %inregs, float* %outregs) {
         %a_addr.i = alloca <4 x float>          ; <<4 x float>*> [#uses=1]
diff --git a/test/CodeGen/Generic/vector-identity-shuffle.ll b/test/CodeGen/Generic/vector-identity-shuffle.ll
index 61b44af..332d6d8 100644
--- a/test/CodeGen/Generic/vector-identity-shuffle.ll
+++ b/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 
 define void @test(<4 x float>* %tmp2.i) {
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index f283256..a0f9a02 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,5 +1,5 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 %d8 = type <8 x double>
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index 245c2f9..f339373 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4136
 
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
diff --git a/test/CodeGen/MSP430/2009-05-17-Rot.ll b/test/CodeGen/MSP430/2009-05-17-Rot.ll
index c25a906..2ae0052 100644
--- a/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @rol1u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-17-Shift.ll b/test/CodeGen/MSP430/2009-05-17-Shift.ll
index b048bb3..25aff60 100644
--- a/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | grep rra | count 1
+; RUN: llc < %s -march=msp430 | grep rra | count 1
 
 define i16 @lsr2u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
index 70f1d99..54eb7ff 100644
--- a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
+++ b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @test(double %d) nounwind {
 entry:
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
new file mode 100644
index 0000000..088d3e1
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR4769
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @foo() nounwind readnone {
+entry:
+  %result = alloca i16, align 1                   ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result
+  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  ret i16 %tmp
+}
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i16 @bar() nounwind                ; <i16> [#uses=1]
+  %tobool = icmp eq i16 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result.i
+  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  ret i16 0
+}
+
+declare i16 @bar()
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
new file mode 100644
index 0000000..cc574c7
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 0x0021 | count 2
+; PR4776
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-unknown"
+
+@"\010x0021" = common global i8 0, align 1        ; <i8*> [#uses=2]
+
+define zeroext i8 @foo(i8 zeroext %x) nounwind {
+entry:
+  %retval = alloca i8                             ; <i8*> [#uses=2]
+  %x.addr = alloca i8                             ; <i8*> [#uses=2]
+  %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
+  store i8 %x, i8* %x.addr
+  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  store i8 %tmp1, i8* %tmp
+  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  volatile store i8 %tmp2, i8* @"\010x0021"
+  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  store i8 %tmp3, i8* %retval
+  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  ret i8 %0
+}
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
new file mode 100644
index 0000000..856eb9d
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=msp430 < %s
+; PR4779 
+define void @foo() nounwind {
+entry:
+	%r = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	or i8 %0, 1		; <i8>:1 [#uses=1]
+	volatile store i8 %1, i8* %r, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mi.ll b/test/CodeGen/MSP430/Inst16mi.ll
new file mode 100644
index 0000000..33d7aa4
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#2, &foo
+	store i16 2, i16 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
new file mode 100644
index 0000000..510afe3
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+@bar = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	&bar, &foo
+        %1 = load i16* @bar
+        store i16 %1, i16* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = add i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = and i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = or i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = xor i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
new file mode 100644
index 0000000..53334aa
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov(i16 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r15, &foo
+	store i16 %a, i16* @foo
+	ret void
+}
+
+define void @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
new file mode 100644
index 0000000..d0cb0d1
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define i16 @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	ret i16 %2
+}
+
+
+define i16 @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	ret i16 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
new file mode 100644
index 0000000..6619c51
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov(i16 %a, i16 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r14, r15
+	ret i16 %b
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	r14, r15
+	%1 = add i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i16 %a, %b
+	ret i16 %1
+}
diff --git a/test/CodeGen/MSP430/Inst8mi.ll b/test/CodeGen/MSP430/Inst8mi.ll
new file mode 100644
index 0000000..ef318ce
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#2, &foo
+	store i8 2, i8 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mm.ll b/test/CodeGen/MSP430/Inst8mm.ll
new file mode 100644
index 0000000..a2987ac
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mm.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+@foo = common global i8 0, align 1
+@bar = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	&bar, &foo
+        %1 = load i8* @bar
+        store i8 %1, i8* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = add i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = and i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = or i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = xor i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
new file mode 100644
index 0000000..04c681e
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov(i8 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r15, &foo
+	store i8 %a, i8* @foo
+	ret void
+}
+
+define void @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
new file mode 100644
index 0000000..62a5d4b
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define i8 @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	ret i8 %2
+}
+
+
+define i8 @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	ret i8 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
new file mode 100644
index 0000000..90ea945
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r14, r15
+	ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	r14, r15
+	%1 = add i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i8 %a, %b
+	ret i8 %1
+}
+
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
new file mode 100644
index 0000000..2cc25a4
--- /dev/null
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define void @imm() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16 32) nounwind
+        ret void
+}
+
+define void @reg(i16 %a) nounwind {
+        call void asm sideeffect "bic\09$0,r2", "r"(i16 %a) nounwind
+        ret void
+}
+
+@foo = global i16 0, align 2
+
+define void @immmem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+        ret void
+}
+
+define void @mem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "m"(i16* @foo) nounwind
+        ret void
+}
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9cd7c80..8e7b70e 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep subu %t | count 2
 ; RUN: grep addu %t | count 4
 
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index 53ceaf3..b2aaa00 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll
index 2d1101a..6bb6bd8 100644
--- a/test/CodeGen/Mips/2008-07-05-ByVal.ll
+++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {lw.*(\$4)} | count 2
+; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index f8eca85..808ce16 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __adddf3
+; RUN: llc < %s -march=mips | grep __adddf3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
index e0e7d76..7ac0f5f 100644
--- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll
+++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __extendsfdf2
+; RUN: llc < %s -march=mips | grep __extendsfdf2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index f2f0374..ca99636 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep trunc.w.s | count 3
+; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
index ab6a9c8..20de18a 100644
--- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
+++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep __floatsidf   %t | count 1
 ; RUN: grep __floatunsidf %t | count 1
 ; RUN: grep __fixdfsi %t | count 1
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index 4d218cf..f6b2045 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {rodata.str1.4,"aMS",@progbits}  %t | count 1
 ; RUN: grep {r.data,}  %t | count 1
 ; RUN: grep {\%hi} %t | count 2
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index 0e3f864..26eb4db 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=8 -march=mips -f -o %t0
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=0 -march=mips -f -o %t1
+; RUN: llc < %s -mips-ssection-threshold=8 -march=mips -o %t0
+; RUN: llc < %s -mips-ssection-threshold=0 -march=mips -o %t1
 ; RUN: grep {sdata} %t0 | count 1
 ; RUN: grep {sbss} %t0 | count 1
 ; RUN: grep {gp_rel} %t0 | count 2
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index fc03bb5..59599b3 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep seh %t | count 1
 ; RUN: grep seb %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 2af7ab1..21ff960 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 4580215..80101fa 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {c\\..*\\.s} %t | count 3
 ; RUN: grep {bc1\[tf\]} %t | count 3
 
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index 5d03a19..042cad6 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {b\[ne\]\[eq\]} | count 1
+; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index de11ac7..77680bc 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep abs.s  %t | count 1
 ; RUN: grep neg.s %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index fea5730..cd35cca 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mfhi  %t | count 1
 ; RUN: grep mflo  %t | count 1
 ; RUN: grep multu %t | count 1
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index b1e999c..c41d521 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,5 +1,5 @@
 ; Double return in abicall (default)
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2615
 
 define double @main(...) {
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 9d18f47..2f33e9b 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {lui.*32767} %t | count 1
 ; RUN: grep {ori.*65535} %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f7a64c3..ca90b50 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mtc1 %t | count 1
 ; RUN: grep mfc1 %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 34596ea..79e49a3 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {subu.*sp} | count 2
+; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll
index e276f5e..54d454c 100644
--- a/test/CodeGen/Mips/2008-08-07-CC.ll
+++ b/test/CodeGen/Mips/2008-08-07-CC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f 
+; RUN: llc < %s -march=mips
 ; Mips must ignore fastcc
 
 target datalayout =
diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll
index fd41ff1..f3bb965 100644
--- a/test/CodeGen/Mips/2008-08-07-FPRound.ll
+++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __truncdfsf2 | count 1
+; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-08-bswap.ll b/test/CodeGen/Mips/2008-08-08-bswap.ll
index 71c2b85..83289d9 100644
--- a/test/CodeGen/Mips/2008-08-08-bswap.ll
+++ b/test/CodeGen/Mips/2008-08-08-bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep wsbw | count 1
+; RUN: llc < %s | grep wsbw | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index e468b6d..1da1db2 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep clz | count 1
+; RUN: llc < %s -march=mips | grep clz | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
index 783850a..18f5b3d 100644
--- a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
+++ b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2794
 
 define i32 @main(i8*) nounwind {
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index 1f7440a..f518843 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 00a969d..a1b45c2 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=mips > %t
+; RUN: llc < %s -march=mips > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep call.*\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
new file mode 100644
index 0000000..d7072dd
--- /dev/null
+++ b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=pic16 | grep {movf \\+@i + 0, \\+W}
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32"
+target triple = "pic16-"
+@i = global i32 -10, align 1		; <i32*> [#uses=1]
+@j = global i32 -20, align 1		; <i32*> [#uses=1]
+@pc = global i8* inttoptr (i64 160 to i8*), align 1		; <i8**> [#uses=3]
+@main.auto.k = internal global i32 0		; <i32*> [#uses=2]
+
+define void @main() nounwind {
+entry:
+	%tmp = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32* @j		; <i32> [#uses=1]
+	%add = add i32 %tmp, %tmp1		; <i32> [#uses=1]
+	store i32 %add, i32* @main.auto.k
+	%tmp2 = load i32* @main.auto.k		; <i32> [#uses=1]
+	%add3 = add i32 %tmp2, 32		; <i32> [#uses=1]
+	%conv = trunc i32 %add3 to i8		; <i8> [#uses=1]
+	%tmp4 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv, i8* %tmp4
+	%tmp5 = load i8** @pc		; <i8*> [#uses=1]
+	%tmp6 = load i8* %tmp5		; <i8> [#uses=1]
+	%conv7 = sext i8 %tmp6 to i16		; <i16> [#uses=1]
+	%sub = sub i16 %conv7, 1		; <i16> [#uses=1]
+	%conv8 = trunc i16 %sub to i8		; <i8> [#uses=1]
+	%tmp9 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv8, i8* %tmp9
+	ret void
+}
diff --git a/test/CodeGen/PIC16/dg.exp b/test/CodeGen/PIC16/dg.exp
new file mode 100644
index 0000000..b08b985
--- /dev/null
+++ b/test/CodeGen/PIC16/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PIC16] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/PIC16/global-in-user-section.ll b/test/CodeGen/PIC16/global-in-user-section.ll
new file mode 100644
index 0000000..74c9d9d
--- /dev/null
+++ b/test/CodeGen/PIC16/global-in-user-section.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = common global i16 0, section "usersection", align 1 
+; CHECK: usersection UDATA
+; CHECK: @G1 RES 2 
diff --git a/test/CodeGen/PIC16/globals.ll b/test/CodeGen/PIC16/globals.ll
new file mode 100644
index 0000000..959eb25
--- /dev/null
+++ b/test/CodeGen/PIC16/globals.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = global i32 4712, section "Address=412"
+; CHECK: @G1.412.idata.0.# IDATA 412
+; CHECK: @G1 dl 4712
+
+@G2 = global i32 0, section "Address=412"
+; CHECK: @G2.412.udata.0.# UDATA 412
+; CHECK: @G2 RES 4
+
+@G3 = addrspace(1) constant i32 4712, section "Address=412"
+; CHECK: @G3.412.romdata.1.# ROMDATA 412
+; CHECK: @G3 rom_dl 4712
+
+
diff --git a/test/CodeGen/PIC16/sext.ll b/test/CodeGen/PIC16/sext.ll
new file mode 100644
index 0000000..b49925f
--- /dev/null
+++ b/test/CodeGen/PIC16/sext.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=pic16
+
+@main.auto.c = internal global i8 0		; <i8*> [#uses=1]
+
+define i16 @main() nounwind {
+entry:
+	%tmp = load i8* @main.auto.c		; <i8> [#uses=1]
+	%conv = sext i8 %tmp to i16		; <i16> [#uses=1]
+	ret i16 %conv
+}
diff --git a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
index 70f294a..f95465c 100644
--- a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
+++ b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 define void @test() {
 	%tr1 = lshr i32 1, 0		; <i32> [#uses=0]
 	ret void
diff --git a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
index 93a9123..c3bfa49 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %tr4 = shl i64 1, 0             ; <i64> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
index 1a1aca4..dea654a 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %shamt = add i8 0, 1            ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
index 3e490b1..fc190a48 100644
--- a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
+++ b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep .comm.*X,0
+; RUN: llc < %s -march=ppc32 | not grep .comm.*X,0
 
 @X = linkonce global {  } zeroinitializer               ; <{  }*> [#uses=0]
 
diff --git a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
index f84caaf..ad02ece 100644
--- a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 
+; RUN: llc < %s -march=ppc32 
 
 define i32 @main() {
         %setle = icmp sle i64 1, 0              ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
index 7b3e9b4..671bf80 100644
--- a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i64 @test() {
         ret i64 undef
diff --git a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
index 8e8fee2..95012c3 100644
--- a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
@@ -1,6 +1,6 @@
 ; this should not crash the ppc backend
 
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define i32 @test(i32 %j.0.0.i) {
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
index 428dd0c..5d1df46 100644
--- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -1,6 +1,6 @@
 ; This function should have exactly one call to fixdfdi, no more!
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:    grep {bl .*fixdfdi} | count 1
 
 define double @test2(double %tmp.7705) {
diff --git a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
index 54f24c6..8a5d3b0 100644
--- a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
+++ b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
@@ -1,7 +1,7 @@
 ; This was erroneously being turned into an rlwinm instruction.
 ; The sign bit does matter in this case.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srawi
+; RUN: llc < %s -march=ppc32 | grep srawi
 
 define i32 @test(i32 %X) {
         %Y = and i32 %X, -2             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
index d56cffc..047a12b 100644
--- a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
+++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index 1b3bde8..97bb48e 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {, f1}
+; RUN: llc < %s | not grep {, f1}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index 86ad718..fbf2540 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @iterative_hash_host_wide_int() {
         %zero = alloca i32              ; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
index 8500260..172e348 100644
--- a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
+++ b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define double @CalcSpeed(float %tmp127) {
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index a536fa1..969772e 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
 ; RUN:   grep {vspltish v.*, 10}
 
 define void @test(<8 x i16>* %P) {
diff --git a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
index b79cce2..d225664 100644
--- a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
+++ b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; END.
 
 define void @test(i8* %stack) {
diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
index e1033c3..0205d10 100644
--- a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; END.
 
 	%struct.attr_desc = type { i8*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, i32 }
diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index 33807ca..1b8b064 100644
--- a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep extsw | count 2
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep extsw | count 2
 
 @lens = external global i8*             ; <i8**> [#uses=1]
 @vals = external global i32*            ; <i32**> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
index c25cf21..65dd568 100644
--- a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
+++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
-define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) {
+define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
         %tmp93 = load i16* null         ; <i16> [#uses=1]
         %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]
         store i16 %tmp99, i16* %ui16
diff --git a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
index 1043e45..a947e5c 100644
--- a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
+++ b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsldoi
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsldoi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vor
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {
         %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >     ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index aff4ede..cb76b5c 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct..0anon = type { i32 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
diff --git a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
index 5210dd1..f748a8b 100644
--- a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
+++ b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
index 7a65c00..57ed250 100644
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -combiner-alias-analysis | grep f5
+; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
index 6621cec..002a064 100644
--- a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=ppc32 | not grep IMPLICIT_DEF
 
 define void @foo(i64 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
index 313568c..3d462b4 100644
--- a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep xor 
+; RUN: llc < %s -march=ppc32 | grep xor 
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
index 6dc1ff0..3284f0a 100644
--- a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i32* @foo(i32 %n) {
         %A = alloca i32, i32 %n         ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
index 80ef479..49b3b9d 100644
--- a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
+++ b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep rlwimi
 
 define void @test(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) {
         %X = shl i16 %div.0.i.i.i.i, 1          ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
index 7680c21..61b9967 100644
--- a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
+++ b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @glgRunProcessor15() {
         %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index be3b863..ba86304 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s 
 
 define void @bitap() {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 058166f..6d9a3fa 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s
 
 @qsz.b = external global i1             ; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
index 19fedf9..805528c 100644
--- a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
+++ b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsb
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh
+; RUN: llc < %s -march=ppc32 | grep extsb
+; RUN: llc < %s -march=ppc32 | grep extsh
 
 define i32 @p1(i8 %c, i16 %s) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index d9374ed..7b00ac6 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:    grep cntlzw
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
index f2c951e..0c45472 100644
--- a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
+++ b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
 	%tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )		; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
index d476462..fe5145d 100644
--- a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
+++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; Test two things: 1) that a frameidx can be rewritten in an inline asm
 ; 2) that inline asms can handle reg+imm addr modes.
diff --git a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
index 97f6a01..621d43b 100644
--- a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
 ; RUN:   grep align.*3
 
 @X = global <{i32, i32}> <{ i32 1, i32 123 }>
diff --git a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
index 5a3d3b5..f48f365 100644
--- a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
index 3eef9c5..0473857 100644
--- a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep mflr | count 1
+; RUN: llc < %s | grep mflr | count 1
 
 target datalayout = "e-p:32:32"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 098e748..e93395a 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mcpu=g5 | grep cntlzd
+; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
         %tmp19 = load i64* %t
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 637208b..d43916d 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
 xOperationInitMasks.exit:
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
index 656b831..86fd947 100644
--- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
 
 ; PR1351
 
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index ba0f8fe..f2fdedf 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep {subfc r3,r5,r4}
-; RUN: llvm-as < %s | llc | grep {subfze r4,r2}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfc r5,r2,r4}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfze r2,r3}
+; RUN: llc < %s | grep {subfc r3,r5,r4}
+; RUN: llc < %s | grep {subfze r4,r2}
+; RUN: llc < %s -regalloc=local | grep {subfc r5,r2,r4}
+; RUN: llc < %s -regalloc=local | grep {subfze r2,r3}
 ; The first argument of subfc must not be the same as any other register.
 
 ; PR1357
diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
index 989a751..1df5140 100644
--- a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
+++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1382
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
index b64de68..e4e9314 100644
--- a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 	%struct..0anon = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 5a86418..42f2152 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
 ; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
 
 ; ModuleID = 'tail.c'
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index ae853f6..2938c70 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -1,7 +1,7 @@
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
 ; PR1473
 
 define i8 @foo(i16 zeroext  %a) zeroext  {
diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
index 58260ec..6de7a09 100644
--- a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
+++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+altivec
+; RUN: llc < %s -march=ppc32 -mattr=+altivec
 
 	%struct.XATest = type { float, i16, i8, i8 }
 	%struct.XArrayRange = type { i8, i8, i8, i8 }
diff --git a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
index 34df7bb..06f40d9 100644
--- a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; PR1596
 
 	%struct._obstack_chunk = type { i8* }
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 9c8fa97..82ef2b8 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index c5e7a4d..ea7de98 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep lwzx
+; RUN: llc < %s -march=ppc64 | grep lwzx
 
         %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
         %struct.__mutex_t = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index f6bd333..898c470 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep stfd | count 3
-; RUN: llvm-as < %s | llc | grep stfs | count 1
-; RUN: llvm-as < %s | llc | grep lfd | count 2
-; RUN: llvm-as < %s | llc | grep lfs | count 2
+; RUN: llc < %s | grep stfd | count 3
+; RUN: llc < %s | grep stfs | count 1
+; RUN: llc < %s | grep lfd | count 2
+; RUN: llc < %s | grep lfs | count 2
 ; ModuleID = 'foo.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
index bb7aba4..d12698b 100644
--- a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
         %struct.TCMalloc_SpinLock = type { i32 }
 
diff --git a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
index f4b87cf..5cfe54e 100644
--- a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin
 
 declare void @cxa_atexit_check_1(i8*)
 
diff --git a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
index e71a8fb..c4152b4 100644
--- a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
+++ b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; rdar://5538377
 
         %struct.disk_unsigned = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
index bd11b5d..84fadd1 100644
--- a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
+++ b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mattr=+altivec
+; RUN: llc < %s -march=ppc64 -mattr=+altivec
 	%struct.inoutprops = type <{ i8, [3 x i8] }>
 
 define void @bork(float* %argA, float* %argB, float* %res, i8 %inoutspec.0) {
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index bca6e5a..ee61478 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 80ef6f1..5a07a9b 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
index e49d59a..a9f242b 100644
--- a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.HDescriptor = type <{ i32, i32 }>
 
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index a0649e0..439ef14 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index aca0faa..d1f0285 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s 
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; PR1811
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
index 38ae87c..db2ab87 100644
--- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep nop
+; RUN: llc < %s -march=ppc32 | grep nop
 target triple = "powerpc-apple-darwin8"
 
 
diff --git a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
index 5b9cd1d..791e9e6 100644
--- a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.Handle = type { %struct.oopDesc** }
 	%struct.JNI_ArgumentPusher = type { %struct.SignatureIterator, %struct.JavaCallArguments* }
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index 5edf6b7..cfa1b10 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=local
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index 8101a35..e50fac4 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
index 919de33..222dde4 100644
--- a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
+++ b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
 
 define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index eaeccc5..9f35b83 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index 061c585..dd425f5 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
index 395c986..a8fef05 100644
--- a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define fastcc i8* @page_rec_get_next(i8* %rec) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
index 67c167a..8776d9a 100644
--- a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 	%struct..0objc_object = type { %struct.objc_class* }
 	%struct.NSArray = type { %struct..0objc_object }
diff --git a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
index 0b748d2..8e5bf56 100644
--- a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
index 410736d..2706337 100644
--- a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define fastcc i64 @nonzero_bits1() nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
index 357ab10..839098e 100644
--- a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 ; Avoid reading memory that's already freed.
 
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64)* @_Z13GetSectorSizey to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index a390e52..7b6d491 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 @_ZL10DeviceCode = internal global i16 0		; <i16*> [#uses=1]
 @.str19 = internal constant [64 x i8] c"unlock_then_erase_sector: failed to erase block (status= 0x%x)\0A\00"		; <[64 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
index 5c40b9e..d42c814 100644
--- a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
+++ b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target triple = "powerpc-apple-darwin9.2.2"
 
 define i256 @func(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
diff --git a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
index d337e37..6b40b24 100644
--- a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @t() nounwind {
 	call void null( ppc_fp128 undef )
diff --git a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
index 92b5ca2..862559b 100644
--- a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
+++ b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 @g = external global ppc_fp128
 @h = external global ppc_fp128
diff --git a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
index d3238d2..83c5511 100644
--- a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; <rdar://problem/6020042>
 
 define i32 @bork() nounwind  {
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index b6b9c89..8802b97 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vadduhm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
index 7060fe5..4a834f9 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 	%struct.BiPartSrcDescriptor = type <{ %"struct.BiPartSrcDescriptor::$_105" }>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
index f55ffac..17737d9 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 32e3642..5cd8c34 100644
--- a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
index a7f8181..dc1e936 100644
--- a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
+++ b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
index 2ccca25..c9c05e1 100644
--- a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
+++ b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index b625ceb..97844dd 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
 	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
diff --git a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
index 00ca811..91c36ef 100644
--- a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
+++ b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: *
 ; PR2356
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index c760b41..f474a6d 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 071c788..f4c06fb 100644
--- a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o - | not grep fixunstfsi
+; RUN: llc < %s -march=ppc32 -o - | not grep fixunstfsi
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
index af9a54e..83f3f6f 100644
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR2986
 @argc = external global i32		; <i32*> [#uses=1]
 @buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
index 0ad5b06..20683b9 100644
--- a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
+++ b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2988
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
diff --git a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
index f5b3e93..9ed7f6f 100644
--- a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
+++ b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9.5
 
 define void @__multc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll
index 21218f5..b56c22a 100644
--- a/test/CodeGen/PowerPC/2008-12-12-EH.ll
+++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
-; RUN: llvm-as < %s | llc  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
 
 define void @_Z1fv() {
 entry:
diff --git a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
index 0cf5518..d49d58d 100644
--- a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
+++ b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5
 ; rdar://6499616
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
index a898de0..172531e 100644
--- a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
+++ b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; rdar://6692215
 
 define fastcc void @_qsort(i8* %a, i32 %n, i32 %es, i32 (i8*, i8*)* %cmp, i32 %depth_limit) nounwind optsize ssp {
diff --git a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
index 4ea43ec..29d115d 100644
--- a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
+++ b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; PR4280
 
 define i32 @__fixunssfsi(float %a) nounwind readnone {
diff --git a/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
new file mode 100644
index 0000000..f64e3dc
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs
+
+; Machine code verifier will call isRegTiedToDefOperand() on /all/ register use
+; operands.  We must make sure that the operand flag is found correctly.
+
+; This test case is actually not specific to PowerPC, but the (imm, reg) format
+; of PowerPC "m" operands trigger this bug.
+
+define void @memory_asm_operand(i32 %a) {
+  ; "m" operand will be represented as:
+  ; INLINEASM <es:fake $0>, 10, %R2, 20, -4, %R1
+  ; It is difficult to find the flag operand (20) when starting from %R1
+  call i32 asm "lbzx $0, $1", "=r,m" (i32 %a)
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
new file mode 100644
index 0000000..5d09696
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+; It is wrong on powerpc to substitute reg+reg for $0; the stw opcode
+; would have to change.
+
+@x = external global [0 x i32]                    ; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %y) nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: add r2
+; CHECK: 0(r2)
+  %y_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %y, i32* %y_addr
+  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+  call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..12c4c99
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mtriple=ppc-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
new file mode 100644
index 0000000..6c23a61
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=ppc32 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6"
+
+define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: subfc
+; CHECK: subfe
+; CHECK: subfc
+; CHECK: subfe
+  %tmp0 = add i64 %r.0.ph, -1                           ; <i64> [#uses=1]
+  br label %bb40
+
+bb40:                                             ; preds = %bb40, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb40 ] ; <i32> [#uses=1]
+  %carry.0274 = phi i32 [ 0, %entry ], [%tmp122, %bb40 ] ; <i32> [#uses=1]
+  %r.0273 = phi i64 [ %r.0.ph, %entry ], [ %tmp124, %bb40 ] ; <i64> [#uses=2]
+  %q.0272 = phi i64 [ %q.0.ph, %entry ], [ %ins169, %bb40 ] ; <i64> [#uses=3]
+  %tmp1 = lshr i64 %r.0273, 31                     ; <i64> [#uses=1]
+  %tmp2 = trunc i64 %tmp1 to i32                    ; <i32> [#uses=1]
+  %tmp3 = and i32 %tmp2, -2                         ; <i32> [#uses=1]
+  %tmp213 = trunc i64 %r.0273 to i32              ; <i32> [#uses=2]
+  %tmp106 = lshr i32 %tmp213, 31                     ; <i32> [#uses=1]
+  %tmp107 = or i32 %tmp3, %tmp106                        ; <i32> [#uses=1]
+  %tmp215 = zext i32 %tmp107 to i64                  ; <i64> [#uses=1]
+  %tmp216 = shl i64 %tmp215, 32                   ; <i64> [#uses=1]
+  %tmp108 = shl i32 %tmp213, 1                       ; <i32> [#uses=1]
+  %tmp109 = lshr i64 %q.0272, 63                     ; <i64> [#uses=1]
+  %tmp110 = trunc i64 %tmp109 to i32                    ; <i32> [#uses=1]
+  %tmp111 = or i32 %tmp108, %tmp110                        ; <i32> [#uses=1]
+  %tmp222 = zext i32 %tmp111 to i64                  ; <i64> [#uses=1]
+  %ins224 = or i64 %tmp216, %tmp222               ; <i64> [#uses=2]
+  %tmp112 = lshr i64 %q.0272, 31                     ; <i64> [#uses=1]
+  %tmp113 = trunc i64 %tmp112 to i32                    ; <i32> [#uses=1]
+  %tmp114 = and i32 %tmp113, -2                         ; <i32> [#uses=1]
+  %tmp158 = trunc i64 %q.0272 to i32              ; <i32> [#uses=2]
+  %tmp115 = lshr i32 %tmp158, 31                     ; <i32> [#uses=1]
+  %tmp116 = or i32 %tmp114, %tmp115                        ; <i32> [#uses=1]
+  %tmp160 = zext i32 %tmp116 to i64                  ; <i64> [#uses=1]
+  %tmp161 = shl i64 %tmp160, 32                   ; <i64> [#uses=1]
+  %tmp117 = shl i32 %tmp158, 1                       ; <i32> [#uses=1]
+  %tmp118 = or i32 %tmp117, %carry.0274                 ; <i32> [#uses=1]
+  %tmp167 = zext i32 %tmp118 to i64                  ; <i64> [#uses=1]
+  %ins169 = or i64 %tmp161, %tmp167               ; <i64> [#uses=2]
+  %tmp119 = sub i64 %tmp0, %ins224                    ; <i64> [#uses=1]
+  %tmp120 = ashr i64 %tmp119, 63                        ; <i64> [#uses=2]
+  %tmp121 = trunc i64 %tmp120 to i32                    ; <i32> [#uses=1]
+  %tmp122 = and i32 %tmp121, 1                          ; <i32> [#uses=2]
+  %tmp123 = and i64 %tmp120, %q.0.ph                         ; <i64> [#uses=1]
+  %tmp124 = sub i64 %ins224, %tmp123                    ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %sr1.1.ph ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb41.bb42_crit_edge, label %bb40
+
+bb41.bb42_crit_edge:                              ; preds = %bb40
+  %phitmp278 = zext i32 %tmp122 to i64               ; <i64> [#uses=1]
+  %tmp125 = shl i64 %ins169, 1                    ; <i64> [#uses=1]
+  %tmp126 = or i64 %phitmp278, %tmp125              ; <i64> [#uses=2]
+  ret i64 %tmp126
+}
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index f3246fd..03905a3 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index c3de710..1dc4310 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 45c13a7..25fc626 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -1,35 +1,28 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+
+; CHECK-PPC32: stw r31, 20(r1)
+; CHECK-PPC32: lwz r1, 0(r1)
+; CHECK-PPC32: lwz r31, 20(r1)
+; CHECK-PPC32-NOFP: stw r31, 20(r1)
+; CHECK-PPC32-NOFP: lwz r1, 0(r1)
+; CHECK-PPC32-NOFP: lwz r31, 20(r1)
+; CHECK-PPC32-RS: stwu r1, -80(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+
+; CHECK-PPC64: std r31, 40(r1)
+; CHECK-PPC64: stdu r1, -112(r1)
+; CHECK-PPC64: ld r1, 0(r1)
+; CHECK-PPC64: ld r31, 40(r1)
+; CHECK-PPC64-NOFP: std r31, 40(r1)
+; CHECK-PPC64-NOFP: stdu r1, -112(r1)
+; CHECK-PPC64-NOFP: ld r1, 0(r1)
+; CHECK-PPC64-NOFP: ld r31, 40(r1)
 
 define i32* @f1(i32 %n) {
 	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 0a15d22..fda2e4f 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -1,77 +1,52 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llvm-as < %s > %t.bc
+; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
 
-define i32* @f1() {
+; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
+
+
+target triple = "powerpc-apple-darwin8"
+
+define i32* @f1() nounwind {
         %tmp = alloca i32, i32 8191             ; <i32*> [#uses=1]
         ret i32* %tmp
 }
 
+; PPC32-NOFP: _f1:
+; PPC32-NOFP: 	lis r0, -1
+; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	lwz r1, 0(r1)
+; PPC32-NOFP: 	blr 
+
+; PPC32-FP: _f1:
+; PPC32-FP:	stw r31, 20(r1)
+; PPC32-FP:	lis r0, -1
+; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	stwux r1, r1, r0
+; ...
+; PPC32-FP:	lwz r1, 0(r1)
+; PPC32-FP:	lwz r31, 20(r1)
+; PPC32-FP:	blr 
+
+
+; PPC64-NOFP: _f1:
+; PPC64-NOFP: 	lis r0, -1
+; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	ld r1, 0(r1)
+; PPC64-NOFP: 	blr 
+
+
+; PPC64-FP: _f1:
+; PPC64-FP:	std r31, 40(r1)
+; PPC64-FP:	lis r0, -1
+; PPC64-FP:	ori r0, r0, 32656
+; PPC64-FP:	stdux r1, r1, r0
+; ...
+; PPC64-FP:	ld r1, 0(r1)
+; PPC64-FP:	ld r31, 40(r1)
+; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
index 11b64703..c2e1d6b 100644
--- a/test/CodeGen/PowerPC/Frames-leaf.ll
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -1,34 +1,34 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {ld r31, 40(r1)}
 
 define i32* @f1() {
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 4ea3afb..6875704 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,26 +1,22 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
 ; RUN  not grep {stw r31, 20(r1)} %t1
 ; RUN: grep {stwu r1, -16448(r1)} %t1
 ; RUN: grep {addi r1, r1, 16448} %t1
-; RUN: llvm-as < %s | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN: not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t2
 ; RUN: grep {stw r31, 20(r1)} %t2
 ; RUN: grep {stwu r1, -16448(r1)} %t2
 ; RUN: grep {addi r1, r1, 16448} %t2
 ; RUN: grep {lwz r31, 20(r1)} %t2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
 ; RUN: not grep {std r31, 40(r1)} %t3
 ; RUN: grep {stdu r1, -16496(r1)} %t3
 ; RUN: grep {addi r1, r1, 16496} %t3
 ; RUN: not grep {ld r31, 40(r1)} %t3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t4 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t4
 ; RUN: grep {std r31, 40(r1)} %t4
 ; RUN: grep {stdu r1, -16496(r1)} %t4
 ; RUN: grep {addi r1, r1, 16496} %t4
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 1705379..0f7acac 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {std r2, 9024}
 
 define void @test() {
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
index 406053b..09a7fbd 100644
--- a/test/CodeGen/PowerPC/addc.ll
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep addc %t | count 1
 ; RUN: grep adde %t | count 1
 ; RUN: grep addze %t | count 1
diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll
index bee8660..2b71ce6 100644
--- a/test/CodeGen/PowerPC/addi-reassoc.ll
+++ b/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep addi
+; RUN: llc < %s -march=ppc32 | not grep addi
 
         %struct.X = type { [5 x i8] }
 
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
index 7ffbe36..e619faa 100644
--- a/test/CodeGen/PowerPC/align.ll
+++ b/test/CodeGen/PowerPC/align.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.4 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.2 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.3 | count 1
 
 @A = global <4 x i32> < i32 10, i32 20, i32 30, i32 40 >                ; <<4 x i32>*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
index f0bb5ea..0484f88 100644
--- a/test/CodeGen/PowerPC/and-branch.ll
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) {
 entry:
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
index eef8f51..3685361 100644
--- a/test/CodeGen/PowerPC/and-elim.ll
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwin
+; RUN: llc < %s -march=ppc32 | not grep rlwin
 
 define void @test(i8* %P) {
 	%W = load i8* %P
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
index 9c80649..64a45e5 100644
--- a/test/CodeGen/PowerPC/and-imm.ll
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {ori\\|lis}
+; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
 
 ; andi. r3, r3, 32769	
 define i32 @test(i32 %X) {
diff --git a/test/CodeGen/PowerPC/and_add.ll b/test/CodeGen/PowerPC/and_add.ll
index f103e7c..517e775 100644
--- a/test/CodeGen/PowerPC/and_add.ll
+++ b/test/CodeGen/PowerPC/and_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep slwi %t
 ; RUN: not grep addi %t
 ; RUN: not grep rlwinm %t
diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll
index e0e498d..c6d234e 100644
--- a/test/CodeGen/PowerPC/and_sext.ll
+++ b/test/CodeGen/PowerPC/and_sext.ll
@@ -1,6 +1,6 @@
 ; These tests should not contain a sign extend.
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb
+; RUN: llc < %s -march=ppc32 | not grep extsh
+; RUN: llc < %s -march=ppc32 | not grep extsb
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i16
diff --git a/test/CodeGen/PowerPC/and_sra.ll b/test/CodeGen/PowerPC/and_sra.ll
index c780605..e6c02d8 100644
--- a/test/CodeGen/PowerPC/and_sra.ll
+++ b/test/CodeGen/PowerPC/and_sra.ll
@@ -1,5 +1,5 @@
 ; Neither of these functions should contain algebraic right shifts
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | not grep srawi 
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index f6bb298..ec4e42d 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | grep stwcx. | count 4
+; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
+; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
 	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 77b7b08..6d9daef9 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep ldarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc64 | grep stdcx. | count 4
+; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
+; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
 	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index cfad6ea..fdead7d 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,69 +1,71 @@
-; RUN: llvm-as < %s | llc | grep {bl L_exact_log2.stub}
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
 
 define i32 @foo(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = call i32 @exact_log2(i64 %1) nounwind		; <i32> [#uses=1]
-	store i32 %2, i32* %0, align 4
-	%3 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval1
+; STATIC: _foo:
+; STATIC: bl _exact_log2
+; STATIC: blr
+; STATIC: .subsections_via_symbols
+
+; PIC: _foo:
+; PIC: bl L_exact_log2$stub
+; PIC: blr
+
+; DYNAMIC: _foo:
+; DYNAMIC: bl L_exact_log2$stub
+; DYNAMIC: blr
+
+        %A = call i32 @exact_log2(i64 %x) nounwind
+	ret i32 %A
 }
 
 define available_externally i32 @exact_log2(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=6]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = sub i64 0, %1		; <i64> [#uses=1]
-	%3 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%4 = and i64 %2, %3		; <i64> [#uses=1]
-	%5 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%6 = icmp ne i64 %4, %5		; <i1> [#uses=1]
-	br i1 %6, label %bb2, label %bb
-
-bb:		; preds = %entry
-	%7 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%8 = icmp eq i64 %7, 0		; <i1> [#uses=1]
-	br i1 %8, label %bb2, label %bb1
-
-bb1:		; preds = %bb
-	%9 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%10 = call i64 @llvm.cttz.i64(i64 %9)		; <i64> [#uses=1]
-	%11 = trunc i64 %10 to i32		; <i32> [#uses=1]
-	store i32 %11, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb2:		; preds = %bb, %entry
-	store i32 -1, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb3:		; preds = %bb2, %bb1
-	%12 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %12, i32* %0, align 4
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %13, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb3
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval4
+	ret i32 42
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+; PIC: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC: L_exact_log2$stub:
+; PIC: .indirect_symbol _exact_log2
+; PIC: mflr r0
+; PIC: bcl 20,31,L_exact_log2$stub$tmp
+
+; PIC: L_exact_log2$stub$tmp:
+; PIC: mflr r11
+; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: mtlr r0
+; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: mtctr r12
+; PIC: bctr
+
+; PIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC: L_exact_log2$lazy_ptr:
+; PIC: .indirect_symbol _exact_log2
+; PIC: .long dyld_stub_binding_helper
+
+; PIC: .subsections_via_symbols
+
+
+; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC: L_exact_log2$stub:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: mtctr r12
+; DYNAMIC: bctr
+
+; DYNAMIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC: L_exact_log2$lazy_ptr:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: .long dyld_stub_binding_helper
+
+
+
+
+
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
index d239357..009f468 100644
--- a/test/CodeGen/PowerPC/big-endian-actual-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addc 4, 4, 6}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {adde 3, 3, 5}
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
index ab136f6..fe85404 100644
--- a/test/CodeGen/PowerPC/big-endian-call-result.ll
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addic 4, 4, 1}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addze 3, 3}
 
 declare i64 @foo()
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 08589f4..e46e1ec 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 6, 3}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 4, 2}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 3, 0}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {mr 5, 3}
 
 declare void @bar(i64 %x, i64 %y)
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index 4aa55a3..cc02e40 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {b LBB.*} | count 4
 
 target datalayout = "E-p:32:32"
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index e450eb8..7eb3bbb 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | not grep rlwinm
+; RUN: llc < %s -march=ppc64 | not grep rlwimi
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
         %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 20ff3db..0454c58 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,11 +1,9 @@
 ; There should be exactly one vxor here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vxor | count 1
 
 ; There should be exactly one vsplti here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vsplti | count 1
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 034c141..0db184f 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,10 +1,10 @@
 ; Test various forms of calls.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bl } | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bctrl} | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bla } | count 1
 
 declare void @foo()
diff --git a/test/CodeGen/PowerPC/cmp-cmp.ll b/test/CodeGen/PowerPC/cmp-cmp.ll
index 07964d5..35a5e42 100644
--- a/test/CodeGen/PowerPC/cmp-cmp.ll
+++ b/test/CodeGen/PowerPC/cmp-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @test(i64 %X) {
         %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/compare-duplicate.ll b/test/CodeGen/PowerPC/compare-duplicate.ll
index df2dfdc..f5108c3 100644
--- a/test/CodeGen/PowerPC/compare-duplicate.ll
+++ b/test/CodeGen/PowerPC/compare-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8  | not grep slwi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8  | not grep slwi
 
 define i32 @test(i32 %A, i32 %B) {
 	%C = sub i32 %B, %A
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index b0ef2d3..5ba0500 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {cmpwi cr0, r3, -1}
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
index b58f59a..8901e02 100644
--- a/test/CodeGen/PowerPC/constants.ll
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -1,9 +1,9 @@
 ; All of these routines should be perform optimal load of constants.
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep lis | count 5
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep ori | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {li } | count 4
 
 define i32 @f1() {
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index 4584c71..b215868 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
+; RUN: llc < %s -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
 ; PR1638
 
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 2c51e8a..ab493a0 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
 declare i32 @llvm.cttz.i32(i32)
 
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
index ceebc70..af23369 100644
--- a/test/CodeGen/PowerPC/darwin-labels.ll
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {foo bar":}
+; RUN: llc < %s | grep {foo bar":}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/delete-node.ll b/test/CodeGen/PowerPC/delete-node.ll
index 0b1d734..a26c211 100644
--- a/test/CodeGen/PowerPC/delete-node.ll
+++ b/test/CodeGen/PowerPC/delete-node.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 ; The DAGCombiner leaves behind a dead node in this testcase. Currently
 ; ISel is ignoring dead nodes, though it would be preferable for
diff --git a/test/CodeGen/PowerPC/div-2.ll b/test/CodeGen/PowerPC/div-2.ll
index 26e6221..2fc916f 100644
--- a/test/CodeGen/PowerPC/div-2.ll
+++ b/test/CodeGen/PowerPC/div-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep blr
+; RUN: llc < %s -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | grep blr
 
 define i32 @test1(i32 %X) {
         %Y = and i32 %X, 15             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index 7be8a34..558fd1b 100644
--- a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep eqv | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep andc | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep orc | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep nor | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep nand | count 1
 
 define i32 @EQV1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/PowerPC/extsh.ll b/test/CodeGen/PowerPC/extsh.ll
index 5eca8ce..506ff86 100644
--- a/test/CodeGen/PowerPC/extsh.ll
+++ b/test/CodeGen/PowerPC/extsh.ll
@@ -1,5 +1,5 @@
 ; This should turn into a single extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh | count 1
+; RUN: llc < %s -march=ppc32 | grep extsh | count 1
 define i32 @test(i32 %X) {
         %tmp.81 = shl i32 %X, 16                ; <i32> [#uses=1]
         %tmp.82 = ashr i32 %tmp.81, 16          ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 54e49b0..6ef740f 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
 
 define double @fabs(double %f) {
 entry:
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 4a6fe70..815c72c 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   egrep {fn?madd|fn?msub} | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index 6c10dfb..bbd5c71 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fnabs
+; RUN: llc < %s -march=ppc32 | grep fnabs
 
 declare double @fabs(double)
 
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
index 9579a74..0bd31bb 100644
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+; RUN: llc < %s -march=ppc32 | not grep fneg
 
 define double @test1(double %a, double %b, double %c, double %d) {
 entry:
diff --git a/test/CodeGen/PowerPC/fold-li.ll b/test/CodeGen/PowerPC/fold-li.ll
index 2ac79f1..92d8da5 100644
--- a/test/CodeGen/PowerPC/fold-li.ll
+++ b/test/CodeGen/PowerPC/fold-li.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | \
+; RUN: llc < %s -march=ppc32  | \
 ; RUN:   grep -v align | not grep li
 
 ;; Test that immediates are folded into these instructions correctly.
diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll
index 3db6ced..673da02 100644
--- a/test/CodeGen/PowerPC/fp-branch.ll
+++ b/test/CodeGen/PowerPC/fp-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fcmp | count 1
+; RUN: llc < %s -march=ppc32 | grep fcmp | count 1
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/PowerPC/fp-int-fp.ll b/test/CodeGen/PowerPC/fp-int-fp.ll
index 1b78b01..18f7f83 100644
--- a/test/CodeGen/PowerPC/fp-int-fp.ll
+++ b/test/CodeGen/PowerPC/fp-int-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep r1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep r1
 
 define double @test1(double %X) {
         %Y = fptosi double %X to i64            ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll
index 43502bb..1360b62 100644
--- a/test/CodeGen/PowerPC/fp_to_uint.ll
+++ b/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fctiwz | count 1
+; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1
 
 define i16 @foo(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/fpcopy.ll b/test/CodeGen/PowerPC/fpcopy.ll
index 7d80596..7b9446b 100644
--- a/test/CodeGen/PowerPC/fpcopy.ll
+++ b/test/CodeGen/PowerPC/fpcopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fmr
+; RUN: llc < %s -march=ppc32 | not grep fmr
 
 define double @test(float %F) {
         %F.upgrd.1 = fpext float %F to double           ; <double> [#uses=1]
diff --git a/test/CodeGen/PowerPC/frounds.ll b/test/CodeGen/PowerPC/frounds.ll
index 0d8e621..8eeadc3 100644
--- a/test/CodeGen/PowerPC/frounds.ll
+++ b/test/CodeGen/PowerPC/frounds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index 1260c60..74a8725 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -1,17 +1,13 @@
 ; fsqrt should be generated when the fsqrt feature is enabled, but not 
 ; otherwise.
 
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
 ; RUN:   grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:  llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN:  grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN:   grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
 ; RUN:   not grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
 ; RUN:   not grep {fsqrt f1, f1}
 
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll
index 1d7275f..ea27e92 100644
--- a/test/CodeGen/PowerPC/hello.ll
+++ b/test/CodeGen/PowerPC/hello.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 ; PR1399
 
 @.str = internal constant [13 x i8] c"Hello World!\00"
diff --git a/test/CodeGen/PowerPC/hidden-vis-2.ll b/test/CodeGen/PowerPC/hidden-vis-2.ll
index 4c9ae55..e9e2c0a 100644
--- a/test/CodeGen/PowerPC/hidden-vis-2.ll
+++ b/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/hidden-vis.ll b/test/CodeGen/PowerPC/hidden-vis.ll
index e04c89a..b2cc143 100644
--- a/test/CodeGen/PowerPC/hidden-vis.ll
+++ b/test/CodeGen/PowerPC/hidden-vis.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/i128-and-beyond.ll b/test/CodeGen/PowerPC/i128-and-beyond.ll
index 9e0d6c3..51bcab2 100644
--- a/test/CodeGen/PowerPC/i128-and-beyond.ll
+++ b/test/CodeGen/PowerPC/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep 4294967295 | count 28
+; RUN: llc < %s -march=ppc32 | grep 4294967295 | count 28
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll
index 5ff2684..d53c948 100644
--- a/test/CodeGen/PowerPC/i64_fp.ll
+++ b/test/CodeGen/PowerPC/i64_fp.ll
@@ -1,21 +1,21 @@
 ; fcfid and fctid should be generated when the 64bit feature is enabled, but not
 ; otherwise.
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fctidz
 
 define double @X(double %Y) {
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 677b41b..a43f09c 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/PowerPC/illegal-element-type.ll b/test/CodeGen/PowerPC/illegal-element-type.ll
index 54a0665..58bd055 100644
--- a/test/CodeGen/PowerPC/illegal-element-type.ll
+++ b/test/CodeGen/PowerPC/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g3
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
index c0a3979..e1ff82d 100644
--- a/test/CodeGen/PowerPC/inlineasm-copy.ll
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mr
+; RUN: llc < %s -march=ppc32 | not grep mr
 
 define i32 @test(i32 %Y, i32 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/int-fp-conv-0.ll b/test/CodeGen/PowerPC/int-fp-conv-0.ll
index 82a1826..983d2b8 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-0.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 > %t
+; RUN: llc < %s -march=ppc64 > %t
 ; RUN: grep  __floattitf %t
 ; RUN: grep  __fixunstfti %t
 
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 583408c..6c82723 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep __floatditf
+; RUN: llc < %s -march=ppc64 | grep __floatditf
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
index 6df968d..3b1f306 100644
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ b/test/CodeGen/PowerPC/invalid-memcpy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173, but it
diff --git a/test/CodeGen/PowerPC/inverted-bool-compares.ll b/test/CodeGen/PowerPC/inverted-bool-compares.ll
index f8c5f11..aa7e4d6 100644
--- a/test/CodeGen/PowerPC/inverted-bool-compares.ll
+++ b/test/CodeGen/PowerPC/inverted-bool-compares.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori
+; RUN: llc < %s -march=ppc32 | not grep xori
 
 define i32 @test(i1 %B, i32* %P) {
         br i1 %B, label %T, label %F
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
index 192d738..4161e34 100644
--- a/test/CodeGen/PowerPC/ispositive.ll
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r3, r3, 31}
 
 define i32 @test1(i32 %X) {
diff --git a/test/CodeGen/PowerPC/itofp128.ll b/test/CodeGen/PowerPC/itofp128.ll
index 4d74511..6d9ef95 100644
--- a/test/CodeGen/PowerPC/itofp128.ll
+++ b/test/CodeGen/PowerPC/itofp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll
index e8f73ee..3a100c1 100644
--- a/test/CodeGen/PowerPC/lha.ll
+++ b/test/CodeGen/PowerPC/lha.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lha
+; RUN: llc < %s -march=ppc32 | grep lha
 
 define i32 @test(i16* %a) {
         %tmp.1 = load i16* %a           ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll
index d2be04e..f1d061c 100644
--- a/test/CodeGen/PowerPC/load-constant-addr.ll
+++ b/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -1,6 +1,6 @@
 ; Should fold the ori into the lfs.
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lfs
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep ori
+; RUN: llc < %s -march=ppc32 | grep lfs
+; RUN: llc < %s -march=ppc32 | not grep ori
 
 define float @test() {
         %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 7b90725..94c2526 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep cntlzw 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {li }
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {mr }
+; RUN: llc < %s -march=ppc32 | grep cntlzw 
+; RUN: llc < %s -march=ppc32 | not grep xori 
+; RUN: llc < %s -march=ppc32 | not grep {li }
+; RUN: llc < %s -march=ppc32 | not grep {mr }
 
 define i1 @test(i64 %x) {
   %tmp = icmp ult i64 %x, 4294967296
diff --git a/test/CodeGen/PowerPC/longdbl-truncate.ll b/test/CodeGen/PowerPC/longdbl-truncate.ll
index a873824..e5f63c6 100644
--- a/test/CodeGen/PowerPC/longdbl-truncate.ll
+++ b/test/CodeGen/PowerPC/longdbl-truncate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/mask64.ll b/test/CodeGen/PowerPC/mask64.ll
index 69d2200..139621a 100644
--- a/test/CodeGen/PowerPC/mask64.ll
+++ b/test/CodeGen/PowerPC/mask64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index fd0e1d4..5661ef9 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep li.*16
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep addi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep li.*16
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep addi
 
 ; Codegen lvx (R+16) as t = li 16,  lvx t,R
 ; This shares the 16 between the two loads.
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index a152762..b267719 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
 ; RUN:   not grep addi
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4		; <i64*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/mul-neg-power-2.ll b/test/CodeGen/PowerPC/mul-neg-power-2.ll
index 90446d7..9688d6e 100644
--- a/test/CodeGen/PowerPC/mul-neg-power-2.ll
+++ b/test/CodeGen/PowerPC/mul-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mul
+; RUN: llc < %s -march=ppc32 | not grep mul
 
 define i32 @test1(i32 %a) {
         %tmp.1 = mul i32 %a, -2         ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/mul-with-overflow.ll b/test/CodeGen/PowerPC/mul-with-overflow.ll
index 0276846..f03e3cb 100644
--- a/test/CodeGen/PowerPC/mul-with-overflow.ll
+++ b/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll
index 3b0daad..9ab8d99 100644
--- a/test/CodeGen/PowerPC/mulhs.ll
+++ b/test/CodeGen/PowerPC/mulhs.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep mulhwu %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep add %t 
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
index 3f75f7d..b9317f9 100644
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ b/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/PowerPC/neg.ll b/test/CodeGen/PowerPC/neg.ll
index c135599..c673912 100644
--- a/test/CodeGen/PowerPC/neg.ll
+++ b/test/CodeGen/PowerPC/neg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep neg
+; RUN: llc < %s -march=ppc32 | grep neg
 
 define i32 @test(i32 %X) {
         %Y = sub i32 0, %X              ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll
index e7ceaae..3459413 100644
--- a/test/CodeGen/PowerPC/no-dead-strip.ll
+++ b/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {no_dead_strip.*_X}
+; RUN: llc < %s | grep {no_dead_strip.*_X}
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll
index 9b6e955..e50374e 100644
--- a/test/CodeGen/PowerPC/or-addressing-mode.ll
+++ b/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep ori
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep ori
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep rlwimi
 
 define i32 @test1(i8* %P) {
         %tmp.2.i = ptrtoint i8* %P to i32               ; <i32> [#uses=2]
diff --git a/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
index e3c5ab1..2fc1720 100644
--- a/test/CodeGen/PowerPC/ppcf128-1-opt.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index a487de7..1047fe5 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; ModuleID = 'ld3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-2.ll b/test/CodeGen/PowerPC/ppcf128-2.ll
index 4318226..7eee354 100644
--- a/test/CodeGen/PowerPC/ppcf128-2.ll
+++ b/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/ppcf128-3.ll b/test/CodeGen/PowerPC/ppcf128-3.ll
index 3a51f4d..5043b62 100644
--- a/test/CodeGen/PowerPC/ppcf128-3.ll
+++ b/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 	%struct.stp_sequence = type { double, double }
 
 define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
diff --git a/test/CodeGen/PowerPC/ppcf128-4.ll b/test/CodeGen/PowerPC/ppcf128-4.ll
index 16d6178..104a25e 100644
--- a/test/CodeGen/PowerPC/ppcf128-4.ll
+++ b/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define ppc_fp128 @__floatditf(i64 %u) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/pr3711_widen_bit.ll b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
index e601e96..7abdeda 100644
--- a/test/CodeGen/PowerPC/pr3711_widen_bit.ll
+++ b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 ; Test that causes a abort in expanding a bit convert due to a missing support
 ; for widening.
diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll
index 0f0d134..d6e6770 100644
--- a/test/CodeGen/PowerPC/private.ll
+++ b/test/CodeGen/PowerPC/private.ll
@@ -1,25 +1,23 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=powerpc-unknown-linux-gnu > %t
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
 ; RUN: grep lis.*\.Lbaz %t
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin > %t
+; RUN: llc < %s -mtriple=powerpc-apple-darwin > %t
 ; RUN: grep L_foo: %t
 ; RUN: grep bl.*\L_foo %t
 ; RUN: grep L_baz: %t
 ; RUN: grep lis.*\L_baz %t
 
-declare void @foo()
-
-define private void @foo() {
+define private void @foo() nounwind {
         ret void
 }
 
 @baz = private global i32 4;
 
-define i32 @bar() {
+define i32 @bar() nounwind {
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
index b86ed1a..e0ddb42 100644
--- a/test/CodeGen/PowerPC/reg-coalesce-simple.ll
+++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | not grep or
 
 %struct.foo = type { i32, i32, [0 x i8] }
 
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index f4cad34..9f8647d 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep mflr
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwz
-; RUN: llvm-as < %s | llc -march=ppc64 | grep {ld r., 16(r1)}
+; RUN: llc < %s -march=ppc32 | grep mflr
+; RUN: llc < %s -march=ppc32 | grep lwz
+; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
 
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index 27a5004..e14a438 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i128 @__fixsfdi(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index f8a42b5..6410c63 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {or }
+; RUN: llc < %s -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | not grep {or }
 
 ; Make sure there is no register-register copies here.
 
diff --git a/test/CodeGen/PowerPC/rlwimi.ll b/test/CodeGen/PowerPC/rlwimi.ll
index 5e310bb..556ca3d 100644
--- a/test/CodeGen/PowerPC/rlwimi.ll
+++ b/test/CodeGen/PowerPC/rlwimi.ll
@@ -1,6 +1,6 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep and
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi | count 8
+; RUN: llc < %s -march=ppc32 | not grep and
+; RUN: llc < %s -march=ppc32 | grep rlwimi | count 8
 
 define i32 @test1(i32 %x, i32 %y) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 33eaacf..59a3655 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep rlwimi %t | count 3
 ; RUN: grep srwi   %t | count 1
 ; RUN: not grep slwi %t
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index fedcfbf..05d37bf 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {Number of machine instrs printed} | grep 12
 
 define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
diff --git a/test/CodeGen/PowerPC/rlwinm.ll b/test/CodeGen/PowerPC/rlwinm.ll
index 9d34865..699f6e7 100644
--- a/test/CodeGen/PowerPC/rlwinm.ll
+++ b/test/CodeGen/PowerPC/rlwinm.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t
 ; RUN: not grep srwi %t
diff --git a/test/CodeGen/PowerPC/rlwinm2.ll b/test/CodeGen/PowerPC/rlwinm2.ll
index 06ceaa2..46542d8 100644
--- a/test/CodeGen/PowerPC/rlwinm2.ll
+++ b/test/CodeGen/PowerPC/rlwinm2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlw[i]nm instructions
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep srwi %t 
diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll
index df10459..d32ef59 100644
--- a/test/CodeGen/PowerPC/rotl-2.ll
+++ b/test/CodeGen/PowerPC/rotl-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwinm | count 4
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | grep rlwinm | count 4
+; RUN: llc < %s -march=ppc32  | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32  | not grep or
 
 define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
 	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/rotl-64.ll b/test/CodeGen/PowerPC/rotl-64.ll
index 3963d9a..674c9e4 100644
--- a/test/CodeGen/PowerPC/rotl-64.ll
+++ b/test/CodeGen/PowerPC/rotl-64.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldicl
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldcl
+; RUN: llc < %s -march=ppc64 | grep rldicl
+; RUN: llc < %s -march=ppc64 | grep rldcl
 ; PR1613
 
 define i64 @t1(i64 %A) {
diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll
index aab5c83..56fc4a8 100644
--- a/test/CodeGen/PowerPC/rotl.ll
+++ b/test/CodeGen/PowerPC/rotl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2
 
 define i32 @rotlw(i32 %x, i32 %sh) {
 entry:
diff --git a/test/CodeGen/PowerPC/sections.ll b/test/CodeGen/PowerPC/sections.ll
new file mode 100644
index 0000000..1af3709
--- /dev/null
+++ b/test/CodeGen/PowerPC/sections.ll
@@ -0,0 +1,8 @@
+; Test to make sure that bss sections are printed with '.section' directive.
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+@A = global i32 0
+
+; CHECK:  .section  .bss,"aw",@nobits
+; CHECK:  .global A
+
diff --git a/test/CodeGen/PowerPC/select-cc.ll b/test/CodeGen/PowerPC/select-cc.ll
index f9464c4..ccc6489 100644
--- a/test/CodeGen/PowerPC/select-cc.ll
+++ b/test/CodeGen/PowerPC/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR3011
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/PowerPC/select_lt0.ll b/test/CodeGen/PowerPC/select_lt0.ll
index 86eb201..95ba84a 100644
--- a/test/CodeGen/PowerPC/select_lt0.ll
+++ b/test/CodeGen/PowerPC/select_lt0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep cmp
+; RUN: llc < %s -march=ppc32 | not grep cmp
 
 define i32 @seli32_1(i32 %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll
index c31f35c..9b2036e 100644
--- a/test/CodeGen/PowerPC/setcc_no_zext.ll
+++ b/test/CodeGen/PowerPC/setcc_no_zext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
 
 define i32 @setcc_one_or_zero(i32* %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 0f0afe9..688b29a 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r., r., 5}
 
 define i32 @eq0(i32 %a) {
diff --git a/test/CodeGen/PowerPC/shift128.ll b/test/CodeGen/PowerPC/shift128.ll
index cf5b3fc..8e518c1 100644
--- a/test/CodeGen/PowerPC/shift128.ll
+++ b/test/CodeGen/PowerPC/shift128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep sld | count 5
+; RUN: llc < %s -march=ppc64 | grep sld | count 5
 
 define i128 @foo_lshr(i128 %x, i128 %y) {
   %r = lshr i128 %x, %y
diff --git a/test/CodeGen/PowerPC/shl_elim.ll b/test/CodeGen/PowerPC/shl_elim.ll
index 3dc4772..f177c4a 100644
--- a/test/CodeGen/PowerPC/shl_elim.ll
+++ b/test/CodeGen/PowerPC/shl_elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep slwi
+; RUN: llc < %s -march=ppc32 | not grep slwi
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/shl_sext.ll b/test/CodeGen/PowerPC/shl_sext.ll
index 61e5cdb..1f35eb4 100644
--- a/test/CodeGen/PowerPC/shl_sext.ll
+++ b/test/CodeGen/PowerPC/shl_sext.ll
@@ -1,5 +1,5 @@
 ; This test should not contain a sign extend
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb 
+; RUN: llc < %s -march=ppc32 | not grep extsb 
 
 define i32 @test(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
index 0e67f77..2679c8e 100644
--- a/test/CodeGen/PowerPC/sign_ext_inreg1.ll
+++ b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srwi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep srwi
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
 
 define i32 @baz(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index e211e86..31bcee6 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {extsh\\|rlwinm}
+; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
 
 declare i16 @foo() signext 
 
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index 5c4a834b..c49b25c 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,6 +1,6 @@
 ; This cannot be a stfiwx
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index c4afb63..d1c3f52 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1
 ; RUN: grep stfiwx %t1
 ; RUN: not grep r1 %t1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
+; RUN:   -o %t2
 ; RUN: not grep stfiwx %t2
 ; RUN: grep r1 %t2
 
diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll
index 5cc4784..25663c1 100644
--- a/test/CodeGen/PowerPC/store-load-fwd.ll
+++ b/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep lwz
+; RUN: llc < %s -march=ppc32 | not grep lwz
 
 define i32 @test(i32* %P) {
         store i32 1, i32* %P
diff --git a/test/CodeGen/PowerPC/subc.ll b/test/CodeGen/PowerPC/subc.ll
index 4ac9596..5914dca 100644
--- a/test/CodeGen/PowerPC/subc.ll
+++ b/test/CodeGen/PowerPC/subc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep subfc %t | count 1
 ; RUN: grep subfe %t | count 1
 ; RUN: grep subfze %t | count 1
diff --git a/test/CodeGen/PowerPC/tailcall1-64.ll b/test/CodeGen/PowerPC/tailcall1-64.ll
index f39b40b..e9c83a5 100644
--- a/test/CodeGen/PowerPC/tailcall1-64.ll
+++ b/test/CodeGen/PowerPC/tailcall1-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -tailcallopt | grep TC_RETURNd8
+; RUN: llc < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcall1.ll b/test/CodeGen/PowerPC/tailcall1.ll
index 1fc4b94..08f3392 100644
--- a/test/CodeGen/PowerPC/tailcall1.ll
+++ b/test/CodeGen/PowerPC/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -tailcallopt | grep TC_RETURN
+; RUN: llc < %s -march=ppc32 -tailcallopt | grep TC_RETURN
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcallpic1.ll b/test/CodeGen/PowerPC/tailcallpic1.ll
index 678d366..f3f5028 100644
--- a/test/CodeGen/PowerPC/tailcallpic1.ll
+++ b/test/CodeGen/PowerPC/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
+; RUN: llc < %s  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
 
 
 
diff --git a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
new file mode 100644
index 0000000..8a1288a
--- /dev/null
+++ b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
@@ -0,0 +1,583 @@
+; RUN: llc < %s
+; PR4534
+
+; ModuleID = 'tango.net.ftp.FtpClient.bc'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6.0"
+	%"byte[]" = type { i32, i8* }
+@.str167 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+@.str170 = external constant [11 x i8]		; <[11 x i8]*> [#uses=2]
+@.str171 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%"byte[]")* @foo to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @foo(%"byte[]" %line_arg) {
+entry:
+	%line_arg830 = extractvalue %"byte[]" %line_arg, 0		; <i32> [#uses=12]
+	%line_arg831 = extractvalue %"byte[]" %line_arg, 1		; <i8*> [#uses=17]
+	%t5 = load i8* %line_arg831		; <i8> [#uses=1]
+	br label %forcondi
+
+forcondi:		; preds = %forbodyi, %entry
+	%l.0i = phi i32 [ 10, %entry ], [ %t4i, %forbodyi ]		; <i32> [#uses=2]
+	%p.0i = phi i8* [ getelementptr ([11 x i8]* @.str167, i32 0, i32 -1), %entry ], [ %t7i, %forbodyi ]		; <i8*> [#uses=1]
+	%t4i = add i32 %l.0i, -1		; <i32> [#uses=1]
+	%t5i = icmp eq i32 %l.0i, 0		; <i1> [#uses=1]
+	br i1 %t5i, label %forcond.i, label %forbodyi
+
+forbodyi:		; preds = %forcondi
+	%t7i = getelementptr i8* %p.0i, i32 1		; <i8*> [#uses=2]
+	%t8i = load i8* %t7i		; <i8> [#uses=1]
+	%t12i = icmp eq i8 %t8i, %t5		; <i1> [#uses=1]
+	br i1 %t12i, label %forcond.i, label %forcondi
+
+forcond.i:		; preds = %forbody.i, %forbodyi, %forcondi
+	%storemerge.i = phi i32 [ %t106.i, %forbody.i ], [ 1, %forcondi ], [ 1, %forbodyi ]		; <i32> [#uses=1]
+	%t77.i286 = phi i1 [ %phit3, %forbody.i ], [ false, %forcondi ], [ false, %forbodyi ]		; <i1> [#uses=1]
+	br i1 %t77.i286, label %forcond.i295, label %forbody.i
+
+forbody.i:		; preds = %forcond.i
+	%t106.i = add i32 %storemerge.i, 1		; <i32> [#uses=2]
+	%phit3 = icmp ugt i32 %t106.i, 3		; <i1> [#uses=1]
+	br label %forcond.i
+
+forcond.i295:		; preds = %forbody.i301, %forcond.i
+	%storemerge.i292 = phi i32 [ %t106.i325, %forbody.i301 ], [ 4, %forcond.i ]		; <i32> [#uses=1]
+	%t77.i293 = phi i1 [ %phit2, %forbody.i301 ], [ false, %forcond.i ]		; <i1> [#uses=1]
+	br i1 %t77.i293, label %forcond.i332, label %forbody.i301
+
+forbody.i301:		; preds = %forcond.i295
+	%t106.i325 = add i32 %storemerge.i292, 1		; <i32> [#uses=2]
+	%phit2 = icmp ugt i32 %t106.i325, 6		; <i1> [#uses=1]
+	br label %forcond.i295
+
+forcond.i332:		; preds = %forbody.i338, %forcond.i295
+	%storemerge.i329 = phi i32 [ %t106.i362, %forbody.i338 ], [ 7, %forcond.i295 ]		; <i32> [#uses=3]
+	%t77.i330 = phi i1 [ %phit1, %forbody.i338 ], [ false, %forcond.i295 ]		; <i1> [#uses=1]
+	br i1 %t77.i330, label %wcond.i370, label %forbody.i338
+
+forbody.i338:		; preds = %forcond.i332
+	%t106.i362 = add i32 %storemerge.i329, 1		; <i32> [#uses=2]
+	%phit1 = icmp ugt i32 %t106.i362, 9		; <i1> [#uses=1]
+	br label %forcond.i332
+
+wcond.i370:		; preds = %wbody.i372, %forcond.i332
+	%.frame.0.11 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=2]
+	%t3.i368 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=5]
+	%t4.i369 = icmp ult i32 %t3.i368, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i369, label %andand.i378, label %wcond22.i383
+
+wbody.i372:		; preds = %andand.i378
+	%t18.i371.c = add i32 %t3.i368, 1		; <i32> [#uses=2]
+	br label %wcond.i370
+
+andand.i378:		; preds = %wcond.i370
+	%t11.i375 = getelementptr i8* %line_arg831, i32 %t3.i368		; <i8*> [#uses=1]
+	%t12.i376 = load i8* %t11.i375		; <i8> [#uses=1]
+	%t14.i377 = icmp eq i8 %t12.i376, 32		; <i1> [#uses=1]
+	br i1 %t14.i377, label %wbody.i372, label %wcond22.i383
+
+wcond22.i383:		; preds = %wbody23.i385, %andand.i378, %wcond.i370
+	%.frame.0.10 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %.frame.0.11, %wcond.i370 ], [ %.frame.0.11, %andand.i378 ]		; <i32> [#uses=2]
+	%t49.i381 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %t3.i368, %wcond.i370 ], [ %t3.i368, %andand.i378 ]		; <i32> [#uses=5]
+	%t32.i382 = icmp ult i32 %t49.i381, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i382, label %andand33.i391, label %wcond54.i396
+
+wbody23.i385:		; preds = %andand33.i391
+	%t50.i384 = add i32 %t49.i381, 1		; <i32> [#uses=2]
+	br label %wcond22.i383
+
+andand33.i391:		; preds = %wcond22.i383
+	%t42.i388 = getelementptr i8* %line_arg831, i32 %t49.i381		; <i8*> [#uses=1]
+	%t43.i389 = load i8* %t42.i388		; <i8> [#uses=1]
+	%t45.i390 = icmp eq i8 %t43.i389, 32		; <i1> [#uses=1]
+	br i1 %t45.i390, label %wcond54.i396, label %wbody23.i385
+
+wcond54.i396:		; preds = %wbody55.i401, %andand33.i391, %wcond22.i383
+	%.frame.0.9 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %.frame.0.10, %wcond22.i383 ], [ %.frame.0.10, %andand33.i391 ]		; <i32> [#uses=2]
+	%t81.i394 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %t49.i381, %wcond22.i383 ], [ %t49.i381, %andand33.i391 ]		; <i32> [#uses=3]
+	%t64.i395 = icmp ult i32 %t81.i394, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i395, label %andand65.i407, label %wcond.i716
+
+wbody55.i401:		; preds = %andand65.i407
+	%t82.i400 = add i32 %t81.i394, 1		; <i32> [#uses=2]
+	br label %wcond54.i396
+
+andand65.i407:		; preds = %wcond54.i396
+	%t74.i404 = getelementptr i8* %line_arg831, i32 %t81.i394		; <i8*> [#uses=1]
+	%t75.i405 = load i8* %t74.i404		; <i8> [#uses=1]
+	%t77.i406 = icmp eq i8 %t75.i405, 32		; <i1> [#uses=1]
+	br i1 %t77.i406, label %wbody55.i401, label %wcond.i716
+
+wcond.i716:		; preds = %wbody.i717, %andand65.i407, %wcond54.i396
+	%.frame.0.0 = phi i32 [ %t18.i.c829, %wbody.i717 ], [ %.frame.0.9, %wcond54.i396 ], [ %.frame.0.9, %andand65.i407 ]		; <i32> [#uses=7]
+	%t4.i715 = icmp ult i32 %.frame.0.0, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i715, label %andand.i721, label %wcond22.i724
+
+wbody.i717:		; preds = %andand.i721
+	%t18.i.c829 = add i32 %.frame.0.0, 1		; <i32> [#uses=1]
+	br label %wcond.i716
+
+andand.i721:		; preds = %wcond.i716
+	%t11.i718 = getelementptr i8* %line_arg831, i32 %.frame.0.0		; <i8*> [#uses=1]
+	%t12.i719 = load i8* %t11.i718		; <i8> [#uses=1]
+	%t14.i720 = icmp eq i8 %t12.i719, 32		; <i1> [#uses=1]
+	br i1 %t14.i720, label %wbody.i717, label %wcond22.i724
+
+wcond22.i724:		; preds = %wbody23.i726, %andand.i721, %wcond.i716
+	%.frame.0.1 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=2]
+	%t49.i722 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=5]
+	%t32.i723 = icmp ult i32 %t49.i722, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i723, label %andand33.i731, label %wcond54.i734
+
+wbody23.i726:		; preds = %andand33.i731
+	%t50.i725 = add i32 %t49.i722, 1		; <i32> [#uses=2]
+	br label %wcond22.i724
+
+andand33.i731:		; preds = %wcond22.i724
+	%t42.i728 = getelementptr i8* %line_arg831, i32 %t49.i722		; <i8*> [#uses=1]
+	%t43.i729 = load i8* %t42.i728		; <i8> [#uses=1]
+	%t45.i730 = icmp eq i8 %t43.i729, 32		; <i1> [#uses=1]
+	br i1 %t45.i730, label %wcond54.i734, label %wbody23.i726
+
+wcond54.i734:		; preds = %wbody55.i736, %andand33.i731, %wcond22.i724
+	%.frame.0.2 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %.frame.0.1, %wcond22.i724 ], [ %.frame.0.1, %andand33.i731 ]		; <i32> [#uses=2]
+	%t81.i732 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %t49.i722, %wcond22.i724 ], [ %t49.i722, %andand33.i731 ]		; <i32> [#uses=3]
+	%t64.i733 = icmp ult i32 %t81.i732, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i733, label %andand65.i740, label %wcond.i750
+
+wbody55.i736:		; preds = %andand65.i740
+	%t82.i735 = add i32 %t81.i732, 1		; <i32> [#uses=2]
+	br label %wcond54.i734
+
+andand65.i740:		; preds = %wcond54.i734
+	%t74.i737 = getelementptr i8* %line_arg831, i32 %t81.i732		; <i8*> [#uses=1]
+	%t75.i738 = load i8* %t74.i737		; <i8> [#uses=1]
+	%t77.i739 = icmp eq i8 %t75.i738, 32		; <i1> [#uses=1]
+	br i1 %t77.i739, label %wbody55.i736, label %wcond.i750
+
+wcond.i750:		; preds = %wbody.i752, %andand65.i740, %wcond54.i734
+	%.frame.0.3 = phi i32 [ %t18.i751.c, %wbody.i752 ], [ %.frame.0.2, %wcond54.i734 ], [ %.frame.0.2, %andand65.i740 ]		; <i32> [#uses=11]
+	%t4.i749 = icmp ult i32 %.frame.0.3, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i749, label %andand.i758, label %wcond22.i761
+
+wbody.i752:		; preds = %andand.i758
+	%t18.i751.c = add i32 %.frame.0.3, 1		; <i32> [#uses=1]
+	br label %wcond.i750
+
+andand.i758:		; preds = %wcond.i750
+	%t11.i755 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=1]
+	%t12.i756 = load i8* %t11.i755		; <i8> [#uses=1]
+	%t14.i757 = icmp eq i8 %t12.i756, 32		; <i1> [#uses=1]
+	br i1 %t14.i757, label %wbody.i752, label %wcond22.i761
+
+wcond22.i761:		; preds = %wbody23.i763, %andand.i758, %wcond.i750
+	%.frame.0.4 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=2]
+	%t49.i759 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=7]
+	%t32.i760 = icmp ult i32 %t49.i759, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i760, label %andand33.i769, label %wcond54.i773
+
+wbody23.i763:		; preds = %andand33.i769
+	%t50.i762 = add i32 %t49.i759, 1		; <i32> [#uses=2]
+	br label %wcond22.i761
+
+andand33.i769:		; preds = %wcond22.i761
+	%t42.i766 = getelementptr i8* %line_arg831, i32 %t49.i759		; <i8*> [#uses=1]
+	%t43.i767 = load i8* %t42.i766		; <i8> [#uses=1]
+	%t45.i768 = icmp eq i8 %t43.i767, 32		; <i1> [#uses=1]
+	br i1 %t45.i768, label %wcond54.i773, label %wbody23.i763
+
+wcond54.i773:		; preds = %wbody55.i775, %andand33.i769, %wcond22.i761
+	%.frame.0.5 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %.frame.0.4, %wcond22.i761 ], [ %.frame.0.4, %andand33.i769 ]		; <i32> [#uses=1]
+	%t81.i770 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %t49.i759, %wcond22.i761 ], [ %t49.i759, %andand33.i769 ]		; <i32> [#uses=3]
+	%t64.i771 = icmp ult i32 %t81.i770, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i771, label %andand65.i780, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+wbody55.i775:		; preds = %andand65.i780
+	%t82.i774 = add i32 %t81.i770, 1		; <i32> [#uses=2]
+	br label %wcond54.i773
+
+andand65.i780:		; preds = %wcond54.i773
+	%t74.i777 = getelementptr i8* %line_arg831, i32 %t81.i770		; <i8*> [#uses=1]
+	%t75.i778 = load i8* %t74.i777		; <i8> [#uses=1]
+	%t77.i779 = icmp eq i8 %t75.i778, 32		; <i1> [#uses=1]
+	br i1 %t77.i779, label %wbody55.i775, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786:		; preds = %andand65.i780, %wcond54.i773
+	%t89.i782 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=4]
+	%t90.i783 = sub i32 %t49.i759, %.frame.0.3		; <i32> [#uses=2]
+	br label %wcond.i792
+
+wcond.i792:		; preds = %wbody.i794, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+	%.frame.0.6 = phi i32 [ %.frame.0.5, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786 ], [ %t18.i793.c, %wbody.i794 ]		; <i32> [#uses=9]
+	%t4.i791 = icmp ult i32 %.frame.0.6, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i791, label %andand.i800, label %wcond22.i803
+
+wbody.i794:		; preds = %andand.i800
+	%t18.i793.c = add i32 %.frame.0.6, 1		; <i32> [#uses=1]
+	br label %wcond.i792
+
+andand.i800:		; preds = %wcond.i792
+	%t11.i797 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=1]
+	%t12.i798 = load i8* %t11.i797		; <i8> [#uses=1]
+	%t14.i799 = icmp eq i8 %t12.i798, 32		; <i1> [#uses=1]
+	br i1 %t14.i799, label %wbody.i794, label %wcond22.i803
+
+wcond22.i803:		; preds = %wbody23.i805, %andand.i800, %wcond.i792
+	%t49.i801 = phi i32 [ %t50.i804, %wbody23.i805 ], [ %.frame.0.6, %wcond.i792 ], [ %.frame.0.6, %andand.i800 ]		; <i32> [#uses=7]
+	%t32.i802 = icmp ult i32 %t49.i801, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i802, label %andand33.i811, label %wcond54.i815
+
+wbody23.i805:		; preds = %andand33.i811
+	%t50.i804 = add i32 %t49.i801, 1		; <i32> [#uses=1]
+	br label %wcond22.i803
+
+andand33.i811:		; preds = %wcond22.i803
+	%t42.i808 = getelementptr i8* %line_arg831, i32 %t49.i801		; <i8*> [#uses=1]
+	%t43.i809 = load i8* %t42.i808		; <i8> [#uses=1]
+	%t45.i810 = icmp eq i8 %t43.i809, 32		; <i1> [#uses=1]
+	br i1 %t45.i810, label %wcond54.i815, label %wbody23.i805
+
+wcond54.i815:		; preds = %wbody55.i817, %andand33.i811, %wcond22.i803
+	%t81.i812 = phi i32 [ %t82.i816, %wbody55.i817 ], [ %t49.i801, %wcond22.i803 ], [ %t49.i801, %andand33.i811 ]		; <i32> [#uses=3]
+	%t64.i813 = icmp ult i32 %t81.i812, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i813, label %andand65.i822, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+wbody55.i817:		; preds = %andand65.i822
+	%t82.i816 = add i32 %t81.i812, 1		; <i32> [#uses=1]
+	br label %wcond54.i815
+
+andand65.i822:		; preds = %wcond54.i815
+	%t74.i819 = getelementptr i8* %line_arg831, i32 %t81.i812		; <i8*> [#uses=1]
+	%t75.i820 = load i8* %t74.i819		; <i8> [#uses=1]
+	%t77.i821 = icmp eq i8 %t75.i820, 32		; <i1> [#uses=1]
+	br i1 %t77.i821, label %wbody55.i817, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828:		; preds = %andand65.i822, %wcond54.i815
+	%t89.i824 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=4]
+	%t90.i825 = sub i32 %t49.i801, %.frame.0.6		; <i32> [#uses=2]
+	%t63 = load i8* %t89.i824		; <i8> [#uses=2]
+	br label %forcondi622
+
+forcondi622:		; preds = %forbodyi626, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+	%l.0i618 = phi i32 [ 10, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t4i620, %forbodyi626 ]		; <i32> [#uses=2]
+	%p.0i619 = phi i8* [ getelementptr ([11 x i8]* @.str170, i32 0, i32 -1), %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t7i623, %forbodyi626 ]		; <i8*> [#uses=1]
+	%t4i620 = add i32 %l.0i618, -1		; <i32> [#uses=1]
+	%t5i621 = icmp eq i32 %l.0i618, 0		; <i1> [#uses=1]
+	br i1 %t5i621, label %if65, label %forbodyi626
+
+forbodyi626:		; preds = %forcondi622
+	%t7i623 = getelementptr i8* %p.0i619, i32 1		; <i8*> [#uses=3]
+	%t8i624 = load i8* %t7i623		; <i8> [#uses=1]
+	%t12i625 = icmp eq i8 %t8i624, %t63		; <i1> [#uses=1]
+	br i1 %t12i625, label %ifi630, label %forcondi622
+
+ifi630:		; preds = %forbodyi626
+	%t15i627 = ptrtoint i8* %t7i623 to i32		; <i32> [#uses=1]
+	%t17i629 = sub i32 %t15i627, ptrtoint ([11 x i8]* @.str170 to i32)		; <i32> [#uses=1]
+	%phit636 = icmp eq i32 %t17i629, 10		; <i1> [#uses=1]
+	br i1 %phit636, label %if65, label %e67
+
+if65:		; preds = %ifi630, %forcondi622
+	%t4i532 = icmp eq i32 %t49.i759, %.frame.0.3		; <i1> [#uses=1]
+	br i1 %t4i532, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %forcondi539
+
+forcondi539:		; preds = %zi546, %if65
+	%sign.1.i533 = phi i1 [ %sign.0.i543, %zi546 ], [ false, %if65 ]		; <i1> [#uses=2]
+	%l.0i534 = phi i32 [ %t33i545, %zi546 ], [ %t90.i783, %if65 ]		; <i32> [#uses=3]
+	%p.0i535 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=6]
+	%c.0.ini536 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=1]
+	%c.0i537 = load i8* %c.0.ini536		; <i8> [#uses=2]
+	%t8i538 = icmp eq i32 %l.0i534, 0		; <i1> [#uses=1]
+	br i1 %t8i538, label %endfori550, label %forbodyi540
+
+forbodyi540:		; preds = %forcondi539
+	switch i8 %c.0i537, label %endfori550 [
+		i8 32, label %zi546
+		i8 9, label %zi546
+		i8 45, label %if20i541
+		i8 43, label %if26i542
+	]
+
+if20i541:		; preds = %forbodyi540
+	br label %zi546
+
+if26i542:		; preds = %forbodyi540
+	br label %zi546
+
+zi546:		; preds = %if26i542, %if20i541, %forbodyi540, %forbodyi540
+	%sign.0.i543 = phi i1 [ false, %if26i542 ], [ true, %if20i541 ], [ %sign.1.i533, %forbodyi540 ], [ %sign.1.i533, %forbodyi540 ]		; <i1> [#uses=1]
+	%t30i544 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t33i545 = add i32 %l.0i534, -1		; <i32> [#uses=1]
+	br label %forcondi539
+
+endfori550:		; preds = %forbodyi540, %forcondi539
+	%t37i547 = icmp eq i8 %c.0i537, 48		; <i1> [#uses=1]
+	%t39i548 = icmp sgt i32 %l.0i534, 1		; <i1> [#uses=1]
+	%or.condi549 = and i1 %t37i547, %t39i548		; <i1> [#uses=1]
+	br i1 %or.condi549, label %if40i554, label %endif41i564
+
+if40i554:		; preds = %endfori550
+	%t43i551 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t44i552 = load i8* %t43i551		; <i8> [#uses=1]
+	%t45i553 = zext i8 %t44i552 to i32		; <i32> [#uses=1]
+	switch i32 %t45i553, label %endif41i564 [
+		i32 120, label %case46i556
+		i32 88, label %case46i556
+		i32 98, label %case51i558
+		i32 66, label %case51i558
+		i32 111, label %case56i560
+		i32 79, label %case56i560
+	]
+
+case46i556:		; preds = %if40i554, %if40i554
+	%t48i555 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case51i558:		; preds = %if40i554, %if40i554
+	%t53i557 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case56i560:		; preds = %if40i554, %if40i554
+	%t58i559 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+endif41i564:		; preds = %case56i560, %case51i558, %case46i556, %if40i554, %endfori550
+	%r.0i561 = phi i32 [ 0, %if40i554 ], [ 8, %case56i560 ], [ 2, %case51i558 ], [ 16, %case46i556 ], [ 0, %endfori550 ]		; <i32> [#uses=2]
+	%p.2i562 = phi i8* [ %t43i551, %if40i554 ], [ %t58i559, %case56i560 ], [ %t53i557, %case51i558 ], [ %t48i555, %case46i556 ], [ %p.0i535, %endfori550 ]		; <i8*> [#uses=2]
+	%t63i563 = icmp eq i32 %r.0i561, 0		; <i1> [#uses=1]
+	br i1 %t63i563, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %if70i568
+
+if70i568:		; preds = %endif41i564
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576:		; preds = %if70i568, %endif41i564, %if65
+	%radix.0.i570 = phi i32 [ 0, %if65 ], [ %r.0i561, %if70i568 ], [ 10, %endif41i564 ]		; <i32> [#uses=2]
+	%p.1i571 = phi i8* [ %p.2i562, %if70i568 ], [ %t89.i782, %if65 ], [ %p.2i562, %endif41i564 ]		; <i8*> [#uses=1]
+	%t84i572 = ptrtoint i8* %p.1i571 to i32		; <i32> [#uses=1]
+	%t85i573 = ptrtoint i8* %t89.i782 to i32		; <i32> [#uses=1]
+	%t86i574 = sub i32 %t84i572, %t85i573		; <i32> [#uses=2]
+	%t6.i575 = sub i32 %t90.i783, %t86i574		; <i32> [#uses=1]
+	%t59i604 = zext i32 %radix.0.i570 to i64		; <i64> [#uses=1]
+	br label %fcondi581
+
+fcondi581:		; preds = %if55i610, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+	%value.0i577 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t65i607, %if55i610 ]		; <i64> [#uses=1]
+	%fkey.0i579 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t70i609, %if55i610 ]		; <i32> [#uses=3]
+	%t3i580 = icmp ult i32 %fkey.0i579, %t6.i575		; <i1> [#uses=1]
+	br i1 %t3i580, label %fbodyi587, label %wcond.i422
+
+fbodyi587:		; preds = %fcondi581
+	%t5.s.i582 = add i32 %t86i574, %fkey.0i579		; <i32> [#uses=1]
+	%t89.i782.s = add i32 %.frame.0.3, %t5.s.i582		; <i32> [#uses=1]
+	%t5i583 = getelementptr i8* %line_arg831, i32 %t89.i782.s		; <i8*> [#uses=1]
+	%t6i584 = load i8* %t5i583		; <i8> [#uses=6]
+	%t6.off84i585 = add i8 %t6i584, -48		; <i8> [#uses=1]
+	%or.cond.i28.i586 = icmp ugt i8 %t6.off84i585, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i586, label %ei590, label %endifi603
+
+ei590:		; preds = %fbodyi587
+	%t6.off83i588 = add i8 %t6i584, -97		; <i8> [#uses=1]
+	%or.cond81i589 = icmp ugt i8 %t6.off83i588, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i589, label %e24i595, label %if22i592
+
+if22i592:		; preds = %ei590
+	%t27i591 = add i8 %t6i584, -39		; <i8> [#uses=1]
+	br label %endifi603
+
+e24i595:		; preds = %ei590
+	%t6.offi593 = add i8 %t6i584, -65		; <i8> [#uses=1]
+	%or.cond82i594 = icmp ugt i8 %t6.offi593, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i594, label %wcond.i422, label %if39i597
+
+if39i597:		; preds = %e24i595
+	%t44.i29.i596 = add i8 %t6i584, -7		; <i8> [#uses=1]
+	br label %endifi603
+
+endifi603:		; preds = %if39i597, %if22i592, %fbodyi587
+	%c.0.i30.i598 = phi i8 [ %t27i591, %if22i592 ], [ %t44.i29.i596, %if39i597 ], [ %t6i584, %fbodyi587 ]		; <i8> [#uses=1]
+	%t48.i31.i599 = zext i8 %c.0.i30.i598 to i32		; <i32> [#uses=1]
+	%t49i600 = add i32 %t48.i31.i599, 208		; <i32> [#uses=1]
+	%t52i601 = and i32 %t49i600, 255		; <i32> [#uses=2]
+	%t54i602 = icmp ult i32 %t52i601, %radix.0.i570		; <i1> [#uses=1]
+	br i1 %t54i602, label %if55i610, label %wcond.i422
+
+if55i610:		; preds = %endifi603
+	%t61i605 = mul i64 %value.0i577, %t59i604		; <i64> [#uses=1]
+	%t64i606 = zext i32 %t52i601 to i64		; <i64> [#uses=1]
+	%t65i607 = add i64 %t61i605, %t64i606		; <i64> [#uses=1]
+	%t70i609 = add i32 %fkey.0i579, 1		; <i32> [#uses=1]
+	br label %fcondi581
+
+e67:		; preds = %ifi630
+	%t4i447 = icmp eq i32 %t49.i801, %.frame.0.6		; <i1> [#uses=1]
+	br i1 %t4i447, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %forcondi454
+
+forcondi454:		; preds = %zi461, %e67
+	%c.0i452 = phi i8 [ %c.0i452.pre, %zi461 ], [ %t63, %e67 ]		; <i8> [#uses=2]
+	%sign.1.i448 = phi i1 [ %sign.0.i458, %zi461 ], [ false, %e67 ]		; <i1> [#uses=2]
+	%l.0i449 = phi i32 [ %t33i460, %zi461 ], [ %t90.i825, %e67 ]		; <i32> [#uses=3]
+	%p.0i450 = phi i8* [ %t30i459, %zi461 ], [ %t89.i824, %e67 ]		; <i8*> [#uses=5]
+	%t8i453 = icmp eq i32 %l.0i449, 0		; <i1> [#uses=1]
+	br i1 %t8i453, label %endfori465, label %forbodyi455
+
+forbodyi455:		; preds = %forcondi454
+	switch i8 %c.0i452, label %endfori465 [
+		i8 32, label %zi461
+		i8 9, label %zi461
+		i8 45, label %if20i456
+		i8 43, label %if26i457
+	]
+
+if20i456:		; preds = %forbodyi455
+	br label %zi461
+
+if26i457:		; preds = %forbodyi455
+	br label %zi461
+
+zi461:		; preds = %if26i457, %if20i456, %forbodyi455, %forbodyi455
+	%sign.0.i458 = phi i1 [ false, %if26i457 ], [ true, %if20i456 ], [ %sign.1.i448, %forbodyi455 ], [ %sign.1.i448, %forbodyi455 ]		; <i1> [#uses=1]
+	%t30i459 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t33i460 = add i32 %l.0i449, -1		; <i32> [#uses=1]
+	%c.0i452.pre = load i8* %t30i459		; <i8> [#uses=1]
+	br label %forcondi454
+
+endfori465:		; preds = %forbodyi455, %forcondi454
+	%t37i462 = icmp eq i8 %c.0i452, 48		; <i1> [#uses=1]
+	%t39i463 = icmp sgt i32 %l.0i449, 1		; <i1> [#uses=1]
+	%or.condi464 = and i1 %t37i462, %t39i463		; <i1> [#uses=1]
+	br i1 %or.condi464, label %if40i469, label %endif41i479
+
+if40i469:		; preds = %endfori465
+	%t43i466 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t44i467 = load i8* %t43i466		; <i8> [#uses=1]
+	%t45i468 = zext i8 %t44i467 to i32		; <i32> [#uses=1]
+	switch i32 %t45i468, label %endif41i479 [
+		i32 120, label %case46i471
+		i32 111, label %case56i475
+	]
+
+case46i471:		; preds = %if40i469
+	%t48i470 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+case56i475:		; preds = %if40i469
+	%t58i474 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+endif41i479:		; preds = %case56i475, %case46i471, %if40i469, %endfori465
+	%r.0i476 = phi i32 [ 0, %if40i469 ], [ 8, %case56i475 ], [ 16, %case46i471 ], [ 0, %endfori465 ]		; <i32> [#uses=2]
+	%p.2i477 = phi i8* [ %t43i466, %if40i469 ], [ %t58i474, %case56i475 ], [ %t48i470, %case46i471 ], [ %p.0i450, %endfori465 ]		; <i8*> [#uses=2]
+	%t63i478 = icmp eq i32 %r.0i476, 0		; <i1> [#uses=1]
+	br i1 %t63i478, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %if70i483
+
+if70i483:		; preds = %endif41i479
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491:		; preds = %if70i483, %endif41i479, %e67
+	%radix.0.i485 = phi i32 [ 0, %e67 ], [ %r.0i476, %if70i483 ], [ 10, %endif41i479 ]		; <i32> [#uses=2]
+	%p.1i486 = phi i8* [ %p.2i477, %if70i483 ], [ %t89.i824, %e67 ], [ %p.2i477, %endif41i479 ]		; <i8*> [#uses=1]
+	%t84i487 = ptrtoint i8* %p.1i486 to i32		; <i32> [#uses=1]
+	%t85i488 = ptrtoint i8* %t89.i824 to i32		; <i32> [#uses=1]
+	%t86i489 = sub i32 %t84i487, %t85i488		; <i32> [#uses=2]
+	%ttt = sub i32 %t90.i825, %t86i489		; <i32> [#uses=1]
+	%t59i519 = zext i32 %radix.0.i485 to i64		; <i64> [#uses=1]
+	br label %fcondi496
+
+fcondi496:		; preds = %if55i525, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+	%value.0i492 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t65i522, %if55i525 ]		; <i64> [#uses=1]
+	%fkey.0i494 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t70i524, %if55i525 ]		; <i32> [#uses=3]
+	%t3i495 = icmp ult i32 %fkey.0i494, %ttt		; <i1> [#uses=1]
+	br i1 %t3i495, label %fbodyi502, label %wcond.i422
+
+fbodyi502:		; preds = %fcondi496
+	%t5.s.i497 = add i32 %t86i489, %fkey.0i494		; <i32> [#uses=1]
+	%t89.i824.s = add i32 %.frame.0.6, %t5.s.i497		; <i32> [#uses=1]
+	%t5i498 = getelementptr i8* %line_arg831, i32 %t89.i824.s		; <i8*> [#uses=1]
+	%t6i499 = load i8* %t5i498		; <i8> [#uses=6]
+	%t6.off84i500 = add i8 %t6i499, -48		; <i8> [#uses=1]
+	%or.cond.i28.i501 = icmp ugt i8 %t6.off84i500, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i501, label %ei505, label %endifi518
+
+ei505:		; preds = %fbodyi502
+	%t6.off83i503 = add i8 %t6i499, -97		; <i8> [#uses=1]
+	%or.cond81i504 = icmp ugt i8 %t6.off83i503, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i504, label %e24i510, label %if22i507
+
+if22i507:		; preds = %ei505
+	%t27i506 = add i8 %t6i499, -39		; <i8> [#uses=1]
+	br label %endifi518
+
+e24i510:		; preds = %ei505
+	%t6.offi508 = add i8 %t6i499, -65		; <i8> [#uses=1]
+	%or.cond82i509 = icmp ugt i8 %t6.offi508, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i509, label %wcond.i422, label %if39i512
+
+if39i512:		; preds = %e24i510
+	%t44.i29.i511 = add i8 %t6i499, -7		; <i8> [#uses=1]
+	br label %endifi518
+
+endifi518:		; preds = %if39i512, %if22i507, %fbodyi502
+	%c.0.i30.i513 = phi i8 [ %t27i506, %if22i507 ], [ %t44.i29.i511, %if39i512 ], [ %t6i499, %fbodyi502 ]		; <i8> [#uses=1]
+	%t48.i31.i514 = zext i8 %c.0.i30.i513 to i32		; <i32> [#uses=1]
+	%t49i515 = add i32 %t48.i31.i514, 208		; <i32> [#uses=1]
+	%t52i516 = and i32 %t49i515, 255		; <i32> [#uses=2]
+	%t54i517 = icmp ult i32 %t52i516, %radix.0.i485		; <i1> [#uses=1]
+	br i1 %t54i517, label %if55i525, label %wcond.i422
+
+if55i525:		; preds = %endifi518
+	%t61i520 = mul i64 %value.0i492, %t59i519		; <i64> [#uses=1]
+	%t64i521 = zext i32 %t52i516 to i64		; <i64> [#uses=1]
+	%t65i522 = add i64 %t61i520, %t64i521		; <i64> [#uses=1]
+	%t70i524 = add i32 %fkey.0i494, 1		; <i32> [#uses=1]
+	br label %fcondi496
+
+wcond.i422:		; preds = %e40.i, %endifi518, %e24i510, %fcondi496, %endifi603, %e24i595, %fcondi581
+	%sarg60.pn.i = phi i8* [ %p.0.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i8*> [#uses=3]
+	%start_arg.pn.i = phi i32 [ %t49.i443, %e40.i ], [ 0, %fcondi496 ], [ 0, %e24i510 ], [ 0, %endifi518 ], [ 0, %endifi603 ], [ 0, %e24i595 ], [ 0, %fcondi581 ]		; <i32> [#uses=3]
+	%extent.0.i = phi i32 [ %t51.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i32> [#uses=3]
+	%p.0.i = getelementptr i8* %sarg60.pn.i, i32 %start_arg.pn.i		; <i8*> [#uses=2]
+	%p.0.s63.i = add i32 %start_arg.pn.i, -1		; <i32> [#uses=1]
+	%t2i424 = getelementptr i8* %sarg60.pn.i, i32 %p.0.s63.i		; <i8*> [#uses=1]
+	br label %forcondi430
+
+forcondi430:		; preds = %forbodyi434, %wcond.i422
+	%l.0i426 = phi i32 [ %extent.0.i, %wcond.i422 ], [ %t4i428, %forbodyi434 ]		; <i32> [#uses=2]
+	%p.0i427 = phi i8* [ %t2i424, %wcond.i422 ], [ %t7i431, %forbodyi434 ]		; <i8*> [#uses=1]
+	%t4i428 = add i32 %l.0i426, -1		; <i32> [#uses=1]
+	%t5i429 = icmp eq i32 %l.0i426, 0		; <i1> [#uses=1]
+	br i1 %t5i429, label %e.i441, label %forbodyi434
+
+forbodyi434:		; preds = %forcondi430
+	%t7i431 = getelementptr i8* %p.0i427, i32 1		; <i8*> [#uses=3]
+	%t8i432 = load i8* %t7i431		; <i8> [#uses=1]
+	%t12i433 = icmp eq i8 %t8i432, 32		; <i1> [#uses=1]
+	br i1 %t12i433, label %ifi438, label %forcondi430
+
+ifi438:		; preds = %forbodyi434
+	%t15i435 = ptrtoint i8* %t7i431 to i32		; <i32> [#uses=1]
+	%t16i436 = ptrtoint i8* %p.0.i to i32		; <i32> [#uses=1]
+	%t17i437 = sub i32 %t15i435, %t16i436		; <i32> [#uses=1]
+	br label %e.i441
+
+e.i441:		; preds = %ifi438, %forcondi430
+	%t2561.i = phi i32 [ %t17i437, %ifi438 ], [ %extent.0.i, %forcondi430 ]		; <i32> [#uses=2]
+	%p.0.s.i = add i32 %start_arg.pn.i, %t2561.i		; <i32> [#uses=1]
+	%t32.s.i = add i32 %p.0.s.i, -1		; <i32> [#uses=1]
+	%t2i.i = getelementptr i8* %sarg60.pn.i, i32 %t32.s.i		; <i8*> [#uses=1]
+	br label %forbodyi.i
+
+forbodyi.i:		; preds = %forbodyi.i, %e.i441
+	%p.0i.i = phi i8* [ %t2i.i, %e.i441 ], [ %t7i.i, %forbodyi.i ]		; <i8*> [#uses=1]
+	%s2.0i.i = phi i8* [ getelementptr ([5 x i8]* @.str171, i32 0, i32 0), %e.i441 ], [ %t11i.i, %forbodyi.i ]		; <i8*> [#uses=2]
+	%t7i.i = getelementptr i8* %p.0i.i, i32 1		; <i8*> [#uses=2]
+	%t8i.i = load i8* %t7i.i		; <i8> [#uses=1]
+	%t11i.i = getelementptr i8* %s2.0i.i, i32 1		; <i8*> [#uses=1]
+	%t12i.i = load i8* %s2.0i.i		; <i8> [#uses=1]
+	%t14i.i = icmp eq i8 %t8i.i, %t12i.i		; <i1> [#uses=1]
+	br i1 %t14i.i, label %forbodyi.i, label %e40.i
+
+e40.i:		; preds = %forbodyi.i
+	%t49.i443 = add i32 %t2561.i, 1		; <i32> [#uses=2]
+	%t51.i = sub i32 %extent.0.i, %t49.i443		; <i32> [#uses=1]
+	br label %wcond.i422
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 530c782..bc05bb1 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {__trampoline_setup}
+; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
 
 module asm "\09.lazy_reference .objc_class_name_NSImageRep"
 module asm "\09.objc_class_name_NSBitmapImageRep=0"
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index d211b3b..ef97912 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fmul | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 | grep fmul | count 2
+; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) {
diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll
index 815bb0a..7a42c27 100644
--- a/test/CodeGen/PowerPC/vcmp-fold.ll
+++ b/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -1,6 +1,6 @@
 ; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
 ; "vcmpbfp.".
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
 
 
 define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll
index 6d79967..c34d850 100644
--- a/test/CodeGen/PowerPC/vec_br_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vcmpeqfp. %t
 ; RUN: not grep mfcr %t
 
diff --git a/test/CodeGen/PowerPC/vec_call.ll b/test/CodeGen/PowerPC/vec_call.ll
index 8e7a08e..4511315 100644
--- a/test/CodeGen/PowerPC/vec_call.ll
+++ b/test/CodeGen/PowerPC/vec_call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
 	%C = add <4 x i32> %A, %B		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index c4b42b9e..32c6f48 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index 9fdbffd..e01e659 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubfp
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
 
 define void @t(<4 x float>* %A) {
 	%tmp2 = load <4 x float>* %A
diff --git a/test/CodeGen/PowerPC/vec_insert.ll b/test/CodeGen/PowerPC/vec_insert.ll
index 04bbe65..185454c 100644
--- a/test/CodeGen/PowerPC/vec_insert.ll
+++ b/test/CodeGen/PowerPC/vec_insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep sth
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep sth
 
 define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index 15376cae..d7ed64a5 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index b061fa9..80f4de4 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
index 5bb1b60..2c3594d 100644
--- a/test/CodeGen/PowerPC/vec_perf_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
 	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_shift.ll b/test/CodeGen/PowerPC/vec_shift.ll
index 0cc699c..646fb5f 100644
--- a/test/CodeGen/PowerPC/vec_shift.ll
+++ b/test/CodeGen/PowerPC/vec_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -march=ppc32 -mcpu=g5
+; RUN: llc < %s  -march=ppc32 -mcpu=g5
 ; PR3628
 
 define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll
index 1289dca..8270632 100644
--- a/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | \
+; RUN: opt < %s -instcombine | \
 ; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
 ; RUN: grep vsldoi  %t | count 2
 ; RUN: grep vmrgh   %t | count 7
 ; RUN: grep vmrgl   %t | count 6
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index 7b7e4fe..6123728 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -1,7 +1,7 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g3 | \
 ; RUN:    grep stfs | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vspltw %t | count 2
 ; RUN: grep vsplti %t | count 3
 ; RUN: grep vsplth %t | count 1
diff --git a/test/CodeGen/PowerPC/vec_vrsave.ll b/test/CodeGen/PowerPC/vec_vrsave.ll
index 06769f6..2a03d58 100644
--- a/test/CodeGen/PowerPC/vec_vrsave.ll
+++ b/test/CodeGen/PowerPC/vec_vrsave.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vrlw %t
 ; RUN: not grep spr %t
 ; RUN: not grep vrsave %t
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index 7350e91..f862b2c 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vxor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
index aefd266..dfa2e35 100644
--- a/test/CodeGen/PowerPC/vector-identity-shuffle.ll
+++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep test:
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep test:
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define void @test(<4 x float>* %tmp2.i) {
         %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index a6c17b4..ee4da31 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g3 > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
index 76f140c..082f9f4 100644
--- a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
+++ b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define void @execute_list() {
         %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
diff --git a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
index a014ace..41ad3b2 100644
--- a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
+++ b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 ; We cannot emit jump tables on Sparc, but we should correctly handle this case.
 
diff --git a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
index d1ca44d..77c2002 100644
--- a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
+++ b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR1540
 
 declare float @sinf(float)
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index f9f4c21..e8315f1 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
index aaa7bde..c12e9c1 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2009-08-28-PIC.ll b/test/CodeGen/SPARC/2009-08-28-PIC.ll
new file mode 100644
index 0000000..a2ba0d0
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc --relocation-model=pic < %s | grep _GLOBAL_OFFSET_TABLE_
+
+@foo = global i32 0                               ; <i32*> [#uses=1]
+
+define i32 @func() nounwind readonly {
+entry:
+  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
new file mode 100644
index 0000000..0167d32
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
@@ -0,0 +1,6 @@
+; RUN: llc -march=sparc < %s | grep weak
+
+define weak i32 @func() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 5c3e075..9c2c16a 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index d603baa..37d1c5a 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,9 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts
-; RUN: llvm-as < %s | llc -march=sparc -mattr=-v9 | \
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts
+; RUN: llc < %s -march=sparc -mattr=-v9 | \
 ; RUN:   not grep popc
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index a9850b7..8fa3e7e 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc  -march=sparc > %t
+; RUN: llc < %s  -march=sparc > %t
 ; RUN: grep .foo: %t
 ; RUN: grep call.*\.foo %t
 ; RUN: grep .baz: %t
diff --git a/test/CodeGen/SPARC/xnor.ll b/test/CodeGen/SPARC/xnor.ll
index 9d8994c..6ff66bd 100644
--- a/test/CodeGen/SPARC/xnor.ll
+++ b/test/CodeGen/SPARC/xnor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc | \
+; RUN: llc < %s -march=sparc | \
 ; RUN:   grep xnor | count 2
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
new file mode 100644
index 0000000..de23795
--- /dev/null
+++ b/test/CodeGen/SystemZ/00-RetVoid.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define void @foo() {
+entry:
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
new file mode 100644
index 0000000..9ab2097
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetArg.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    ret i64 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
new file mode 100644
index 0000000..8b99e68
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetImm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 1
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 1
+; RUN: llc < %s -march=systemz | grep llihf | count 1
+
+
+define i64 @foo1() {
+entry:
+    ret i64 1
+}
+
+define i64 @foo2() {
+entry:
+    ret i64 65535 
+}
+
+define i64 @foo3() {
+entry:
+    ret i64 131072
+}
+
+define i64 @foo4() {
+entry:
+    ret i64 8589934592
+}
+
+define i64 @foo5() {
+entry:
+    ret i64 562949953421312
+}
+
+define i64 @foo6() {
+entry:
+    ret i64 65537
+}
+
+define i64 @foo7() {
+entry:
+    ret i64 4294967295
+}
+
+define i64 @foo8() {
+entry:
+    ret i64 281483566645248
+}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
new file mode 100644
index 0000000..04022a0
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-MemArith.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=systemz | FileCheck %s
+
+define i32 @foo1(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo1:
+; CHECK:  a %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo2(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo2:
+; CHECK:  ay %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo3(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo3:
+; CHECK:  ag %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = add i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo4(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo4:
+; CHECK:  n %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo5(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo5:
+; CHECK:  ny %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo6(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo6:
+; CHECK:  ng %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = and i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo7(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo7:
+; CHECK:  o %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo8(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo8:
+; CHECK:  oy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo9(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo9:
+; CHECK:  og %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = or i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo10(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo10:
+; CHECK:  x %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo11(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo11:
+; CHECK:  xy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo12(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo12:
+; CHECK:  xg %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = xor i64 %a, %c
+    ret i64 %d
+}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
new file mode 100644
index 0000000..9ff9b6a
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAdd.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
new file mode 100644
index 0000000..6d73e4d
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAddImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
new file mode 100644
index 0000000..1492f9d
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAnd.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
new file mode 100644
index 0000000..53c5e54
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAndImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep ngr   | count 4
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
new file mode 100644
index 0000000..7f3380dc
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetNeg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz | grep lcgr | count 1
+
+define i64 @foo(i64 %a) {
+entry:
+    %c = sub i64 0, %a
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
new file mode 100644
index 0000000..1e8134d
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOr.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
new file mode 100644
index 0000000..68cd24d
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOrImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep oill | count 1
+; RUN: llc < %s -march=systemz | grep oilh | count 1
+; RUN: llc < %s -march=systemz | grep oihl | count 1
+; RUN: llc < %s -march=systemz | grep oihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
new file mode 100644
index 0000000..1c4514f
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSub.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
new file mode 100644
index 0000000..4f91cb0
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSubImm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
new file mode 100644
index 0000000..a9439bf
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXor.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
new file mode 100644
index 0000000..ea4b829
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXorImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
new file mode 100644
index 0000000..0a81271
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
new file mode 100644
index 0000000..2787083
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep ar    | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
new file mode 100644
index 0000000..32673dd
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=systemz | grep ngr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
new file mode 100644
index 0000000..ed5e526
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=systemz | grep ngr | count 3
+; RUN: llc < %s -march=systemz | grep nihf | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
new file mode 100644
index 0000000..0c9bb14
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=systemz | grep lgr   | count 2
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    ret i32 %b
+}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
new file mode 100644
index 0000000..343e30b
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 2
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 2
+
+
+define i32 @foo1() {
+entry:
+    ret i32 1
+}
+
+define i32 @foo2() {
+entry:
+    ret i32 65535 
+}
+
+define i32 @foo3() {
+entry:
+    ret i32 131072
+}
+
+define i32 @foo4() {
+entry:
+    ret i32 65537
+}
+
+define i32 @foo5() {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo6() zeroext {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo7() signext {
+entry:
+    ret i32 4294967295
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
new file mode 100644
index 0000000..87ebcc1
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=systemz | grep lcr | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+    %c = sub i32 0, %a
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
new file mode 100644
index 0000000..6d118b5e
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=systemz | grep oill  | count 3
+; RUN: llc < %s -march=systemz | grep oilh  | count 3
+; RUN: llc < %s -march=systemz | grep oilf  | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
new file mode 100644
index 0000000..4d7661a
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep ogr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
new file mode 100644
index 0000000..11ca796
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
new file mode 100644
index 0000000..b3e1ac2
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep sr    | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
new file mode 100644
index 0000000..0033126
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=systemz | grep xilf  | count 9
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
new file mode 100644
index 0000000..a9af231
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep xgr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
new file mode 100644
index 0000000..cccdc47
--- /dev/null
+++ b/test/CodeGen/SystemZ/04-RetShifts.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=systemz | grep sra   | count 6
+; RUN: llc < %s -march=systemz | grep srag  | count 3
+; RUN: llc < %s -march=systemz | grep srl   | count 6
+; RUN: llc < %s -march=systemz | grep srlg  | count 3
+; RUN: llc < %s -march=systemz | grep sll   | count 6
+; RUN: llc < %s -march=systemz | grep sllg  | count 3
+
+define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = shl i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = shl i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, %idx
+        ret i64 %shr
+}
+
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
new file mode 100644
index 0000000..cf02642
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-LoadAddr.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep lay | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
new file mode 100644
index 0000000..3cf21cc
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemImmStores.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
+; RUN: llc < %s | grep mvi   | count 2
+; RUN: llc < %s | grep mviy  | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
+	store i64 1, i64* %add.ptr
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
+	store i32 2, i32* %add.ptr
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
+	store i16 3, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
+	store i8 4, i8* %add.ptr
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
+        store i8 4, i8* %add.ptr
+        ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
+        store i16 3, i16* %add.ptr
+        ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
new file mode 100644
index 0000000..cf12063
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s | grep ly     | count 2
+; RUN: llc < %s | grep sty    | count 2
+; RUN: llc < %s | grep {l	%}  | count 2
+; RUN: llc < %s | grep {st	%} | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i32* %foo		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %bar
+	ret void
+}
+
+define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
+
+define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr
+	ret void
+}
+
+define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
new file mode 100644
index 0000000..1e6232a
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s | grep {sthy.%} | count 2
+; RUN: llc < %s | grep {lhy.%}  | count 2
+; RUN: llc < %s | grep {lh.%}   | count 6
+; RUN: llc < %s | grep {sth.%}  | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	store i16 %tmp1, i16* %bar
+	ret void
+}
+
+define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %bar
+	ret void
+}
+
+define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
+
+define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr
+	ret void
+}
+
+define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
new file mode 100644
index 0000000..f690a48
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegLoads.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=systemz | not grep aghi
+; RUN: llc < %s -march=systemz | grep llgf | count 1
+; RUN: llc < %s -march=systemz | grep llgh | count 1
+; RUN: llc < %s -march=systemz | grep llgc | count 1
+; RUN: llc < %s -march=systemz | grep lgf  | count 2
+; RUN: llc < %s -march=systemz | grep lgh  | count 2
+; RUN: llc < %s -march=systemz | grep lgb  | count 1
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
+
+define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
new file mode 100644
index 0000000..b851c3f
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegStores.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s | not grep aghi
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+
+; CHECK: foo1:
+; CHECK:   stg %r4, 8(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	store i64 %val, i64* %add.ptr2
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
+entry:
+; CHECK: foo2:
+; CHECK:   st %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	store i32 %val, i32* %add.ptr2
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
+entry:
+; CHECK: foo3:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	store i16 %val, i16* %add.ptr2
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
+entry:
+; CHECK: foo4:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	store i8 %val, i8* %add.ptr2
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo5:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
+	store i8 %conv, i8* %add.ptr2
+	ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo6:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
+	store i16 %conv, i16* %add.ptr2
+	ret void
+}
+
+define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo7:
+; CHECK: st      %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %add.ptr2
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
new file mode 100644
index 0000000..e904f49
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-CallViaStack.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep 168 | count 1
+; RUN: llc < %s | grep 160 | count 3
+; RUN: llc < %s | grep 328 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
+entry:
+	%a = alloca i64, align 8		; <i64*> [#uses=3]
+	store i64 %g, i64* %a
+	call void @bar(i64* %a) nounwind
+	%tmp1 = load i64* %a		; <i64> [#uses=1]
+	ret i64 %tmp1
+}
+
+declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
new file mode 100644
index 0000000..c71da9b
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+        ret i64 %f
+}
+
+define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
new file mode 100644
index 0000000..d89b0df
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-LocalFrame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 328 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
+entry:
+	%g = alloca i64, align 8		; <i64*> [#uses=1]
+	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
+	ret i64* %add.ptr
+}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
new file mode 100644
index 0000000..fd4b5029
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-SimpleCall.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+	tail call void @bar() nounwind
+	ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
new file mode 100644
index 0000000..8599717
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
new file mode 100644
index 0000000..8ece5ac
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond32.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
new file mode 100644
index 0000000..e0bc302
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrUnCond.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+
+define void @foo() noreturn nounwind {
+entry:
+	tail call void @baz() nounwind
+	br label %l1
+
+l1:		; preds = %entry, %l1
+	tail call void @bar() nounwind
+	br label %l1
+}
+
+declare void @bar()
+
+declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
new file mode 100644
index 0000000..4d0ebda
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s | grep cgfi | count 8
+; RUN: llc < %s | grep clgfi | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
new file mode 100644
index 0000000..add34fa
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm32.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s | grep jl  | count 3
+; RUN: llc < %s | grep jh  | count 3
+; RUN: llc < %s | grep je  | count 2
+; RUN: llc < %s | grep jne | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
new file mode 100644
index 0000000..aa4b36e
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-SelectCC.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep clgr
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
+	ret i64 %cond
+}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
new file mode 100644
index 0000000..ff1e441
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRem.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s | grep dsgr  | count 2
+; RUN: llc < %s | grep dsgfr | count 2
+; RUN: llc < %s | grep dlr   | count 2
+; RUN: llc < %s | grep dlgr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @div2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = udiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = udiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @rem(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = srem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = srem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
+
+define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = urem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = urem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
new file mode 100644
index 0000000..d6ec0e7
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s | grep {dsgf.%} | count 2
+; RUN: llc < %s | grep {dsg.%}  | count 2
+; RUN: llc < %s | grep {dl.%}   | count 2
+; RUN: llc < %s | grep dlg      | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64* %b) nounwind readnone {
+entry:
+	%b1 = load i64* %b
+	%div = sdiv i64 %a, %b1
+	ret i64 %div
+}
+
+define i64 @div1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = udiv i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = srem i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = urem i64 %a, %b1
+        ret i64 %div
+}
+
+define i32 @div2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = sdiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @div3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = udiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = srem i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = urem i32 %a, %b1
+        ret i32 %div
+}
+
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
new file mode 100644
index 0000000..1ab88d6
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-SimpleMuls.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | grep msgr | count 2
+; RUN: llc < %s | grep msr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
+define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
new file mode 100644
index 0000000..27189ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo(i64 %N) nounwind {
+entry:
+	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
+	call void @bar(i8* %vla) nounwind
+	ret void
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
new file mode 100644
index 0000000..6e0c1ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Globals.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | grep larl | count 3
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+@bar = common global i64 0, align 8		; <i64*> [#uses=3]
+
+define i64 @foo() nounwind readonly {
+entry:
+	%tmp = load i64* @bar		; <i64> [#uses=1]
+	ret i64 %tmp
+}
+
+define i64* @foo2() nounwind readnone {
+entry:
+	ret i64* @bar
+}
+
+define i64* @foo3(i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
new file mode 100644
index 0000000..32aaa62
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Switches.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=systemz | grep larl
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+		 i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+		 i32 120, label %bb338
+		 i32 121, label %bb339
+		 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
new file mode 100644
index 0000000..cc32538
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-FuncsPic.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
+; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@ptr = external global void (...)*		; <void (...)**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	store void (...)* @func, void (...)** @ptr
+	ret void
+}
+
+declare void @func(...)
+
+define void @foo2() nounwind {
+entry:
+	tail call void (...)* @func() nounwind
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
+	tail call void (...)* %tmp() nounwind
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
new file mode 100644
index 0000000..a77671e
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@src = external global i32		; <i32*> [#uses=2]
+@dst = external global i32		; <i32*> [#uses=2]
+@ptr = external global i32*		; <i32**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	store i32 %tmp, i32* @dst
+	ret void
+}
+
+define void @foo2() nounwind {
+entry:
+	store i32* @dst, i32** @ptr
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
+	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %arrayidx
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
new file mode 100644
index 0000000..609d9dc
--- /dev/null
+++ b/test/CodeGen/SystemZ/11-BSwap.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+
+define i16 @foo(i16 zeroext %a) zeroext {
+	%res = tail call i16 @llvm.bswap.i16(i16 %a)
+	ret i16 %res
+}
+
+define i32 @foo2(i32 zeroext %a) zeroext {
+; CHECK: foo2:
+; CHECK:  lrvr %r1, %r2
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo3(i64 %a) zeroext {
+; CHECK: foo3:
+; CHECK:  lrvgr %r2, %r2
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define i16 @foo4(i16* %b) zeroext {
+	%a = load i16* %b
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        ret i16 %res
+}
+
+define i32 @foo5(i32* %b) zeroext {
+; CHECK: foo5:
+; CHECK:  lrv %r1, 0(%r2)
+	%a = load i32* %b
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo6(i64* %b) {
+; CHECK: foo6:
+; CHECK:  lrvg %r2, 0(%r2)
+	%a = load i64* %b
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define void @foo7(i16 %a, i16* %b) {
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        store i16 %res, i16* %b
+        ret void
+}
+
+define void @foo8(i32 %a, i32* %b) {
+; CHECK: foo8:
+; CHECK:  strv %r2, 0(%r3)
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        store i32 %res, i32* %b
+        ret void
+}
+
+define void @foo9(i64 %a, i64* %b) {
+; CHECK: foo9:
+; CHECK:  strvg %r2, 0(%r3)
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        store i64 %res, i64* %b
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
new file mode 100644
index 0000000..65f8e14
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i32 @main() nounwind {
+entry:
+	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
new file mode 100644
index 0000000..3cfa97d
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=systemz | grep nilf | count 1
+; RUN: llc < %s -march=systemz | grep nill | count 1
+
+define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
+entry:
+        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
+        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
+        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
+        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
+        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
+        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
+        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
+        ret i32 %conv5
+}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
new file mode 100644
index 0000000..3317864
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=systemz | grep rll
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%shl = shl i32 %x, 0		; <i32> [#uses=1]
+	%sub = sub i32 32, 0		; <i32> [#uses=1]
+	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
+	%or = or i32 %shr, %shl		; <i32> [#uses=1]
+	ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
new file mode 100644
index 0000000..5f6ec50d
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
+	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
+
+if.then20.i:		; preds = %entry
+	ret i32 -2
+
+byte_re_search_2.exit:		; preds = %entry
+	ret i32 -1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
new file mode 100644
index 0000000..99d0ee7
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
+entry:
+	br label %for.body38
+
+for.body38:		; preds = %for.body38, %entry
+	br i1 undef, label %for.cond220, label %for.body38
+
+for.cond220:		; preds = %for.cond220, %for.body38
+	br i1 false, label %for.cond220, label %for.end297
+
+for.end297:		; preds = %for.cond220
+	%tmp334 = load i8* undef		; <i8> [#uses=1]
+	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
+	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
+	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
+	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
+	br label %for.body356
+
+for.body356:		; preds = %for.body356, %for.end297
+	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
+	br label %for.body356
+}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
new file mode 100644
index 0000000..a35167f
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
+entry:
+	br i1 undef, label %for.body, label %return
+
+for.body:		; preds = %entry
+	%add = add i32 0, %col		; <i32> [#uses=1]
+	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
+	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
+	br i1 undef, label %if.then13, label %if.else
+
+if.then13:		; preds = %for.body
+	ret i32 0
+
+if.else:		; preds = %for.body
+	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
+	ret i32 0
+
+return:		; preds = %entry
+	ret i32 1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
new file mode 100644
index 0000000..6a76a8e
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 168
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
+
+declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
+
+define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+newFuncRoot:
+	br label %bb3
+
+bb4.exitStub:		; preds = %bb3
+	store double %call, double* %call.out
+	ret void
+
+bb3:		; preds = %newFuncRoot
+	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
+	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
+	br label %bb4.exitStub
+}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
new file mode 100644
index 0000000..564d343
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define float @foo(i32 signext %a) {
+entry:
+    %b = bitcast i32 %a to float
+    ret float %b
+}
+
+define i32 @bar(float %a) {
+entry:
+    %b = bitcast float %a to i32
+    ret i32 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
new file mode 100644
index 0000000..a91e29e
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @dfg_parse() nounwind {
+entry:
+	br i1 undef, label %if.then2208, label %if.else2360
+
+if.then2208:		; preds = %entry
+	br i1 undef, label %bb.nph3189, label %for.end2270
+
+bb.nph3189:		; preds = %if.then2208
+	unreachable
+
+for.end2270:		; preds = %if.then2208
+	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
+	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
+	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
+	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
+	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
+	store i8 117, i8* %yyd.0.i2561.13
+	br label %while.cond.i2558
+
+while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
+	br label %while.cond.i2558
+
+if.else2360:		; preds = %entry
+	unreachable
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
new file mode 100644
index 0000000..f7686f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
+
+define internal void @frame_dummy() nounwind {
+entry:
+  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
+  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
new file mode 100644
index 0000000..fde7d9d
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
+  ret double %0
+}
+
+define float @bar(float %a, float %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
+  ret float %0
+}
+
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
new file mode 100644
index 0000000..e9624ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target SystemZ] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 19c156d..1e61b23 100644
--- a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 %struct.rtx_def = type { i8 }
 @str = external global [7 x i8]
diff --git a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
index ee52cf0..be2b839 100644
--- a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
+++ b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 	%struct.color_sample = type { i32 }
 	%struct.ref = type { %struct.color_sample, i16, i16 }
diff --git a/test/CodeGen/Thumb/2007-03-06-AddR7.ll b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
index ad3e195..8d139e9 100644
--- a/test/CodeGen/Thumb/2007-03-06-AddR7.ll
+++ b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6,+vfp2 | not grep {add r., r7, #2 \\* 4}
 
 	%struct.__fooAllocator = type opaque
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 159be4e..2074bfd 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep r11
+; RUN: llc < %s | not grep r11
 
 target triple = "thumb-linux-gnueabi"
 	%struct.__sched_param = type { i32 }
diff --git a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
index 9b2aba9..5c883b3 100644
--- a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
+++ b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep r0 | count 1
+; RUN: llc < %s -march=thumb | grep r0 | count 1
 
 define i32 @a(i32 %x, i32 %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
new file mode 100644
index 0000000..471a82f
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv6-elf | not grep "subs sp"
+; PR4567
+
+define arm_apcscc i8* @__gets_chk(i8* %s, i32 %slen) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret i8* undef
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = alloca i8, i32 undef, align 4		; <i8*> [#uses=0]
+	br label %bb4
+
+bb3:		; preds = %bb1
+	%1 = malloc i8, i32 undef		; <i8*> [#uses=0]
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb2
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	%2 = call arm_apcscc  i8* @gets(i8* %s) nounwind		; <i8*> [#uses=1]
+	ret i8* %2
+
+bb6:		; preds = %bb4
+	unreachable
+}
+
+declare arm_apcscc i8* @gets(i8*) nounwind
diff --git a/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
new file mode 100644
index 0000000..6e035d0
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10
+
+@Time.2535 = external global i64		; <i64*> [#uses=2]
+
+define arm_apcscc i64 @millisecs() nounwind {
+entry:
+	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%1 = add i64 %0, 1		; <i64> [#uses=1]
+	store i64 %1, i64* @Time.2535, align 4
+	ret i64 %0
+}
diff --git a/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
new file mode 100644
index 0000000..f195348
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct.LinkList = type { i32, %struct.LinkList* }
+	%struct.List = type { i32, i32* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	%ll = alloca %struct.LinkList*, align 4		; <%struct.LinkList**> [#uses=1]
+	%0 = call arm_apcscc  i32 @ReadList(%struct.LinkList** %ll, %struct.List** null) nounwind		; <i32> [#uses=1]
+	switch i32 %0, label %bb5 [
+		i32 7, label %bb4
+		i32 42, label %bb3
+	]
+
+bb3:		; preds = %entry
+	ret i32 1
+
+bb4:		; preds = %entry
+	ret i32 0
+
+bb5:		; preds = %entry
+	ret i32 1
+}
+
+declare arm_apcscc i32 @ReadList(%struct.LinkList** nocapture, %struct.List** nocapture) nounwind
diff --git a/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
new file mode 100644
index 0000000..ef4b5ce
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -0,0 +1,737 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.BF_KEY = type { [18 x i32], [1024 x i32] }
+
+define arm_apcscc void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
+entry:
+	%0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
+	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
+	%7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
+	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
+	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
+	%12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
+	%15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
+	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
+	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
+	%20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
+	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
+	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
+	%25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
+	%28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
+	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
+	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
+	%33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
+	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
+	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
+	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
+	%39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
+	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
+	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
+	%44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
+	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
+	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
+	%49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
+	%52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
+	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
+	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
+	%57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
+	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
+	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
+	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
+	%63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
+	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
+	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
+	%68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
+	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
+	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
+	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
+	%75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
+	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
+	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
+	%80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
+	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
+	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
+	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
+	%86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
+	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
+	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
+	%91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
+	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
+	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
+	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
+	%98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
+	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
+	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
+	%103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
+	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
+	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
+	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
+	%109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
+	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
+	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
+	%114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
+	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
+	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
+	%119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
+	%122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
+	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
+	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
+	%127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
+	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
+	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
+	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
+	%133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
+	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
+	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
+	%138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
+	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
+	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
+	%143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
+	%146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
+	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
+	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
+	%151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
+	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
+	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
+	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
+	%157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
+	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
+	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
+	%162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
+	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
+	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
+	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
+	%169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
+	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
+	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
+	%174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
+	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
+	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
+	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
+	%180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
+	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
+	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
+	%185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
+	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
+	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
+	%190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
+	%193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
+	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
+	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
+	%198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
+	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
+	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
+	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
+	%204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
+	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
+	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
+	%209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
+	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
+	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
+	%214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
+	%217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
+	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
+	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
+	%222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
+	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
+	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
+	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
+	%228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
+	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
+	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
+	%233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
+	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
+	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
+	%238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
+	%241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
+	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
+	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
+	%246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
+	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
+	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
+	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
+	%252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
+	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
+	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
+	%257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
+	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
+	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
+	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
+	%264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
+	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
+	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
+	%269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
+	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
+	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
+	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
+	%275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
+	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
+	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
+	%280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
+	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
+	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
+	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
+	%287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
+	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
+	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
+	%292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
+	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
+	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
+	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
+	%298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
+	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
+	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
+	%303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
+	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
+	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
+	%308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%312 = or i32 0, 256                      ; <i32> [#uses=1]
+	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
+	%314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
+	%317 = or i32 0, 512                      ; <i32> [#uses=1]
+	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
+	%319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
+	%322 = or i32 0, 768                      ; <i32> [#uses=1]
+	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
+	%324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
+	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
+	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
+	%329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+	br label %bb2
+
+bb1:                                              ; preds = %entry
+	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
+	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
+	%334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
+	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
+	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
+	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
+	%341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
+	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
+	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
+	%346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
+	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
+	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
+	%351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
+	%354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
+	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
+	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
+	%359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
+	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
+	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
+	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
+	%365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
+	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
+	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
+	%370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
+	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
+	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
+	%375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
+	%378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
+	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
+	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
+	%383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
+	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
+	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
+	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
+	%389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
+	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
+	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
+	%394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
+	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
+	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
+	%399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
+	%402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
+	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
+	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
+	%407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
+	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
+	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
+	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
+	%413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
+	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
+	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
+	%418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
+	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
+	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
+	%423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
+	%426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
+	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
+	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
+	%431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
+	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
+	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
+	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
+	%437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
+	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
+	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
+	%442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
+	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
+	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
+	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
+	%449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
+	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
+	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
+	%454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
+	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
+	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
+	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
+	%460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
+	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
+	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
+	%465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
+	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
+	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
+	%470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
+	%473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
+	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
+	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
+	%478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
+	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
+	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
+	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
+	%484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
+	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
+	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
+	%489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
+	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
+	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
+	%494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
+	%497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
+	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
+	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
+	%502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
+	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
+	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
+	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
+	%508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
+	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
+	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
+	%513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
+	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
+	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
+	%518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
+	%521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
+	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
+	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
+	%526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
+	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
+	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
+	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
+	%532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
+	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
+	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
+	%537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
+	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
+	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
+	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
+	%544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
+	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
+	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
+	%549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
+	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
+	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
+	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
+	%555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
+	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
+	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
+	%560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
+	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
+	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
+	%565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
+	%568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
+	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
+	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
+	%573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
+	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
+	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
+	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
+	%579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
+	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
+	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
+	%584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
+	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
+	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
+	%589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
+	%592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
+	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
+	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
+	%597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
+	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
+	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
+	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
+	%603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
+	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
+	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
+	%608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
+	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
+	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
+	%613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
+	%616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
+	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
+	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
+	%621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
+	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
+	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
+	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
+	%627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
+	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
+	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
+	%632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
+	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
+	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
+	%637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
+	%640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
+	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
+	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
+	%645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
+	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
+	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
+	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
+	%651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
+	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
+	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
+	%656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
+	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
+	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
+	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
+	%663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
+	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
+	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
+	%668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
+	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
+	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
+	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
+	%674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
+	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
+	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
+	%679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
+	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
+	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
+	%684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+	br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+	%.pn2.in = phi i32* [ %329, %bb ], [ %0, %bb1 ]; <i32*> [#uses=1]
+	%.pn3 = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn15.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn13.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn10.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn4.in = phi i32* [ null, %bb ], [ %684, %bb1 ]; <i32*> [#uses=1]
+	%.pn5 = phi i32 [ 0, %bb ], [ %660, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in = lshr i32 %.pn14.in.in.in, 16; <i32> [#uses=1]
+	%.pn14.in = or i32 %.pn14.in.in, 256      ; <i32> [#uses=1]
+	%.pn13.in.in = lshr i32 %.pn13.in.in.in, 8; <i32> [#uses=1]
+	%.pn15 = lshr i32 %.pn15.in, 24           ; <i32> [#uses=1]
+	%.pn14 = and i32 %.pn14.in, 511           ; <i32> [#uses=1]
+	%.pn13.in = or i32 %.pn13.in.in, 512      ; <i32> [#uses=1]
+	%.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+	%.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
+	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
+	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
+	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
+	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
+	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
+	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
+	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
+	%l.0 = xor i32 %.pn1, %.pn4               ; <i32> [#uses=1]
+	store i32 %l.0, i32* undef, align 4
+	store i32 %r.0, i32* %data, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
new file mode 100644
index 0000000..b6e67b1
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+@.str16 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+
+declare arm_apcscc i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare arm_apcscc i8* @__strcat_chk(i8*, i8*, i32) nounwind
+
+define arm_apcscc i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag, i32 %count) nounwind {
+entry:
+	%0 = alloca i8, i32 undef, align 4        ; <i8*> [#uses=2]
+	%1 = call arm_apcscc  i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
+	%2 = call arm_apcscc  i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+	%3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+	br label %bb11
+
+bb6:                                              ; preds = %bb11
+	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%scevgep = getelementptr i8** %4, i32 %8  ; <i8**> [#uses=1]
+	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	br label %bb3.i
+
+bb3.i:                                            ; preds = %bb3.i, %bb6
+	%scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
+	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	br i1 undef, label %bb3.i, label %bb10
+
+bb10:                                             ; preds = %bb3.i
+	%7 = add i32 %8, 1                        ; <i32> [#uses=1]
+	br label %bb11
+
+bb11:                                             ; preds = %bb10, %entry
+	%8 = phi i32 [ %7, %bb10 ], [ 0, %entry ] ; <i32> [#uses=3]
+	%9 = icmp sgt i32 undef, %8               ; <i1> [#uses=1]
+	br i1 %9, label %bb6, label %bb13
+
+bb13:                                             ; preds = %bb11
+	ret i8* null
+}
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
new file mode 100644
index 0000000..1627f61
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s
+; rdar://7157006
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.asl_file_t = type { i32, i32, i32, %struct.file_string_t*, i64, i64, i64, i64, i64, i64, i32, %struct.FILE*, i8*, i8* }
+%struct.file_string_t = type { i64, i32, %struct.file_string_t*, [0 x i8] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
+; CHECK: t:
+; CHECK: adds r4, #8
+entry:
+  %val = alloca i64, align 4                      ; <i64*> [#uses=3]
+  %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
+  br i1 %0, label %bb13, label %bb1
+
+bb1:                                              ; preds = %entry
+  %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
+  br i1 %3, label %bb13, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
+  %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
+  %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
+  br i1 %8, label %bb13, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %9 = call arm_apcscc  i32 @fseeko(%struct.FILE* %2, i64 %off, i32 0) nounwind ; <i32> [#uses=1]
+  %10 = icmp eq i32 %9, 0                         ; <i1> [#uses=1]
+  br i1 %10, label %bb7, label %bb13
+
+bb7:                                              ; preds = %bb5
+  store i64 0, i64* %val, align 4
+  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
+  %12 = call arm_apcscc  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
+  %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
+  br i1 %13, label %bb10, label %bb13
+
+bb10:                                             ; preds = %bb7
+  %14 = icmp eq i64* %out, null                   ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb11
+
+bb11:                                             ; preds = %bb10
+  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %16 = call arm_apcscc  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
+  store i64 %16, i64* %out, align 4
+  ret i32 0
+
+bb13:                                             ; preds = %bb10, %bb7, %bb5, %bb3, %bb1, %entry
+  %.0 = phi i32 [ 2, %entry ], [ 2, %bb1 ], [ 7, %bb3 ], [ 7, %bb5 ], [ 7, %bb7 ], [ 0, %bb10 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare arm_apcscc i32 @fseeko(%struct.FILE* nocapture, i64, i32) nounwind
+
+declare arm_apcscc i32 @fread(i8* noalias nocapture, i32, i32, %struct.FILE* noalias nocapture) nounwind
+
+declare arm_apcscc i64 @asl_core_ntohq(i64)
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
new file mode 100644
index 0000000..1e3c070
--- /dev/null
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -0,0 +1,288 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.adpcm_state = type { i16, i8 }
+@stepsizeTable = internal constant [89 x i32] [i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 17, i32 19, i32 21, i32 23, i32 25, i32 28, i32 31, i32 34, i32 37, i32 41, i32 45, i32 50, i32 55, i32 60, i32 66, i32 73, i32 80, i32 88, i32 97, i32 107, i32 118, i32 130, i32 143, i32 157, i32 173, i32 190, i32 209, i32 230, i32 253, i32 279, i32 307, i32 337, i32 371, i32 408, i32 449, i32 494, i32 544, i32 598, i32 658, i32 724, i32 796, i32 876, i32 963, i32 1060, i32 1166, i32 1282, i32 1411, i32 1552, i32 1707, i32 1878, i32 2066, i32 2272, i32 2499, i32 2749, i32 3024, i32 3327, i32 3660, i32 4026, i32 4428, i32 4871, i32 5358, i32 5894, i32 6484, i32 7132, i32 7845, i32 8630, i32 9493, i32 10442, i32 11487, i32 12635, i32 13899, i32 15289, i32 16818, i32 18500, i32 20350, i32 22385, i32 24623, i32 27086, i32 29794, i32 32767]		; <[89 x i32]*> [#uses=4]
+@indexTable = internal constant [16 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8, i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8]		; <[16 x i32]*> [#uses=2]
+@abuf = common global [500 x i8] zeroinitializer		; <[500 x i8]*> [#uses=1]
+@.str = private constant [11 x i8] c"input file\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[11 x i8]*> [#uses=1]
+@sbuf = common global [1000 x i16] zeroinitializer		; <[1000 x i16]*> [#uses=1]
+@state = common global %struct.adpcm_state zeroinitializer		; <%struct.adpcm_state*> [#uses=3]
+@__stderrp = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@.str1 = private constant [28 x i8] c"Final valprev=%d, index=%d\0A\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[28 x i8]*> [#uses=1]
+
+define arm_apcscc void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb27
+
+bb:		; preds = %bb25, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
+	%outp.136 = phi i8* [ %outdata, %entry ], [ %outp.0, %bb25 ]		; <i8*> [#uses=3]
+	%bufferstep.035 = phi i32 [ 1, %entry ], [ %tmp, %bb25 ]		; <i32> [#uses=3]
+	%outputbuffer.134 = phi i32 [ undef, %entry ], [ %outputbuffer.0, %bb25 ]		; <i32> [#uses=2]
+	%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ]		; <i32> [#uses=1]
+	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
+	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
+	%inp.038 = getelementptr i16* %indata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
+	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
+	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %12, i32 8, i32 0		; <i32> [#uses=2]
+	%13 = sub i32 0, %11		; <i32> [#uses=1]
+	%14 = icmp eq i32 %iftmp.1.0, 0		; <i1> [#uses=2]
+	%. = select i1 %14, i32 %11, i32 %13		; <i32> [#uses=2]
+	%15 = ashr i32 %step.031, 3		; <i32> [#uses=1]
+	%16 = icmp slt i32 %., %step.031		; <i1> [#uses=2]
+	%delta.0 = select i1 %16, i32 0, i32 4		; <i32> [#uses=2]
+	%17 = select i1 %16, i32 0, i32 %step.031		; <i32> [#uses=2]
+	%diff.1 = sub i32 %., %17		; <i32> [#uses=2]
+	%18 = ashr i32 %step.031, 1		; <i32> [#uses=2]
+	%19 = icmp slt i32 %diff.1, %18		; <i1> [#uses=2]
+	%20 = or i32 %delta.0, 2		; <i32> [#uses=1]
+	%21 = select i1 %19, i32 %delta.0, i32 %20		; <i32> [#uses=1]
+	%22 = select i1 %19, i32 0, i32 %18		; <i32> [#uses=2]
+	%diff.2 = sub i32 %diff.1, %22		; <i32> [#uses=1]
+	%23 = ashr i32 %step.031, 2		; <i32> [#uses=2]
+	%24 = icmp slt i32 %diff.2, %23		; <i1> [#uses=2]
+	%25 = zext i1 %24 to i32		; <i32> [#uses=1]
+	%26 = select i1 %24, i32 0, i32 %23		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %17, %15		; <i32> [#uses=1]
+	%vpdiff.1 = add i32 %vpdiff.0, %22		; <i32> [#uses=1]
+	%vpdiff.2 = add i32 %vpdiff.1, %26		; <i32> [#uses=2]
+	%tmp30 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %14, i32 %vpdiff.2, i32 %tmp30		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.132		; <i32> [#uses=3]
+	%27 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %27, label %bb18, label %bb16
+
+bb16:		; preds = %bb
+	%28 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %28, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb
+	%valpred.2 = phi i32 [ -32768, %bb17 ], [ 32767, %bb ], [ %valpred.0, %bb16 ]		; <i32> [#uses=2]
+	%delta.1 = or i32 %21, %iftmp.1.0		; <i32> [#uses=1]
+	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
+	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
+	%30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
+	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
+	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
+	br i1 %37, label %bb24, label %bb23
+
+bb23:		; preds = %bb18
+	%38 = shl i32 %29, 4		; <i32> [#uses=1]
+	%39 = and i32 %38, 240		; <i32> [#uses=1]
+	br label %bb25
+
+bb24:		; preds = %bb18
+	%40 = trunc i32 %29 to i8		; <i8> [#uses=1]
+	%41 = and i8 %40, 15		; <i8> [#uses=1]
+	%42 = trunc i32 %outputbuffer.134 to i8		; <i8> [#uses=1]
+	%43 = or i8 %41, %42		; <i8> [#uses=1]
+	store i8 %43, i8* %outp.136, align 1
+	%44 = getelementptr i8* %outp.136, i32 1		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb24, %bb23
+	%outputbuffer.0 = phi i32 [ %39, %bb23 ], [ %outputbuffer.134, %bb24 ]		; <i32> [#uses=2]
+	%outp.0 = phi i8* [ %outp.136, %bb23 ], [ %44, %bb24 ]		; <i8*> [#uses=2]
+	%tmp = xor i32 %bufferstep.035, 1		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb26.bb27_crit_edge, label %bb
+
+bb26.bb27_crit_edge:		; preds = %bb25
+	%phitmp44 = icmp eq i32 %bufferstep.035, 1		; <i1> [#uses=1]
+	br label %bb27
+
+bb27:		; preds = %bb26.bb27_crit_edge, %entry
+	%outp.1.lcssa = phi i8* [ %outp.0, %bb26.bb27_crit_edge ], [ %outdata, %entry ]		; <i8*> [#uses=1]
+	%bufferstep.0.lcssa = phi i1 [ %phitmp44, %bb26.bb27_crit_edge ], [ false, %entry ]		; <i1> [#uses=1]
+	%outputbuffer.1.lcssa = phi i32 [ %outputbuffer.0, %bb26.bb27_crit_edge ], [ undef, %entry ]		; <i32> [#uses=1]
+	%index.0.lcssa = phi i32 [ %index.2, %bb26.bb27_crit_edge ], [ %5, %entry ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %valpred.2, %bb26.bb27_crit_edge ], [ %2, %entry ]		; <i32> [#uses=1]
+	br i1 %bufferstep.0.lcssa, label %bb28, label %bb29
+
+bb28:		; preds = %bb27
+	%45 = trunc i32 %outputbuffer.1.lcssa to i8		; <i8> [#uses=1]
+	store i8 %45, i8* %outp.1.lcssa, align 1
+	br label %bb29
+
+bb29:		; preds = %bb28, %bb27
+	%46 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %46, i16* %0, align 2
+	%47 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %47, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb22
+
+bb:		; preds = %bb20, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb20 ]		; <i32> [#uses=2]
+	%inp.131 = phi i8* [ %indata, %entry ], [ %inp.0, %bb20 ]		; <i8*> [#uses=3]
+	%bufferstep.028 = phi i32 [ 0, %entry ], [ %tmp, %bb20 ]		; <i32> [#uses=2]
+	%inputbuffer.127 = phi i32 [ undef, %entry ], [ %inputbuffer.0, %bb20 ]		; <i32> [#uses=2]
+	%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ]		; <i32> [#uses=4]
+	%outp.030 = getelementptr i16* %outdata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = icmp eq i32 %bufferstep.028, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2, label %bb3
+
+bb2:		; preds = %bb
+	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
+	%12 = getelementptr i8* %inp.131, i32 1		; <i8*> [#uses=1]
+	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb
+	%inputbuffer.0 = phi i32 [ %11, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=1]
+	%delta.0.in = phi i32 [ %13, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=5]
+	%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ]		; <i8*> [#uses=1]
+	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
+	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
+	%14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
+	%18 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %18, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%19 = and i32 %delta.0.in, 8		; <i32> [#uses=1]
+	%20 = ashr i32 %step.024, 3		; <i32> [#uses=1]
+	%21 = and i32 %delta.0.in, 4		; <i32> [#uses=1]
+	%22 = icmp eq i32 %21, 0		; <i1> [#uses=1]
+	%23 = select i1 %22, i32 0, i32 %step.024		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %23, %20		; <i32> [#uses=2]
+	%24 = and i32 %delta.0.in, 2		; <i32> [#uses=1]
+	%25 = icmp eq i32 %24, 0		; <i1> [#uses=1]
+	br i1 %25, label %bb11, label %bb10
+
+bb10:		; preds = %bb3
+	%26 = ashr i32 %step.024, 1		; <i32> [#uses=1]
+	%27 = add i32 %vpdiff.0, %26		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb3
+	%vpdiff.1 = phi i32 [ %27, %bb10 ], [ %vpdiff.0, %bb3 ]		; <i32> [#uses=2]
+	%28 = and i32 %delta.0.in, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %28, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb13, label %bb12
+
+bb12:		; preds = %bb11
+	%29 = ashr i32 %step.024, 2		; <i32> [#uses=1]
+	%30 = add i32 %vpdiff.1, %29		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11
+	%vpdiff.2 = phi i32 [ %30, %bb12 ], [ %vpdiff.1, %bb11 ]		; <i32> [#uses=2]
+	%31 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	%tmp23 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %31, i32 %vpdiff.2, i32 %tmp23		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.125		; <i32> [#uses=3]
+	%32 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %32, label %bb20, label %bb18
+
+bb18:		; preds = %bb13
+	%33 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %33, label %bb19, label %bb20
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb13
+	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
+	%34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
+	store i16 %36, i16* %outp.030, align 2
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb22, label %bb
+
+bb22:		; preds = %bb20, %entry
+	%index.0.lcssa = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%37 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %37, i16* %0, align 2
+	%38 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%0 = tail call arm_apcscc  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	tail call arm_apcscc  void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+	ret i32 1
+
+bb2:		; preds = %bb
+	%2 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	%3 = shl i32 %0, 1		; <i32> [#uses=1]
+	tail call arm_apcscc  void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+	%4 = shl i32 %0, 2		; <i32> [#uses=1]
+	%5 = tail call arm_apcscc  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	br label %bb
+
+bb4:		; preds = %bb2
+	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = tail call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare arm_apcscc i32 @read(...)
+
+declare arm_apcscc void @perror(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @write(...)
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd76250..acfdc91 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep {ldr sp}
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin | \
+; RUN: llc < %s -march=thumb | not grep {ldr sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | \
 ; RUN:   not grep {sub.*r7}
-; RUN: llvm-as < %s | llc -march=thumb | grep 4294967280
+; RUN: llc < %s -march=thumb | grep 4294967280
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/Thumb/fpconv.ll b/test/CodeGen/Thumb/fpconv.ll
index 2003131..7da36dd 100644
--- a/test/CodeGen/Thumb/fpconv.ll
+++ b/test/CodeGen/Thumb/fpconv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define float @f1(double %x) {
 entry:
diff --git a/test/CodeGen/Thumb/fpow.ll b/test/CodeGen/Thumb/fpow.ll
index e5b92ad..be3dc0b 100644
--- a/test/CodeGen/Thumb/fpow.ll
+++ b/test/CodeGen/Thumb/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
index 270e331..0cac755 100644
--- a/test/CodeGen/Thumb/frame_thumb.ll
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin \
+; RUN: llc < %s -mtriple=thumb-apple-darwin \
 ; RUN:     -disable-fp-elim | not grep {r11}
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
 ; RUN:     -disable-fp-elim | not grep {r11}
 
 define i32 @f() {
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 13084f6..d7cdcd8 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -stats |& \
+; RUN: llc < %s -march=thumb -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
index 2c872e7..5c8a52a 100644
--- a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 ; Test Thumb-mode "I" constraint, for ADD immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
index 91f5970..eac3ef2 100644
--- a/test/CodeGen/Thumb/ispositive.ll
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {lsr r0, r0, #31}
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @test1(i32 %X) {
 entry:
+; CHECK: test1:
+; CHECK: lsrs r0, r0, #31
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index f7c9ed0..02de36a 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
index 4b2a7b2..9a28124 100644
--- a/test/CodeGen/Thumb/ldr_ext.ll
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -1,27 +1,56 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+; rdar://7176514
+
+define i32 @test1(i8* %t1) nounwind {
+; V5: ldrb
+
+; V6: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; V5: ldrh
+
+; V6: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; V5: ldrb
+; V5: lsls
+; V5: asrs
+
+; V6: ldrb
+; V6: sxtb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; V5: ldrh
+; V5: lsls
+; V5: asrs
+
+; V6: ldrh
+; V6: sxth
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; V5: movs r0, #0
+; V5: ldrsh
+
+; V6: movs r0, #0
+; V6: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 0043fb5..81782cd 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cpy | count 2
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @f1() {
+; CHECK: f1:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
 	%tmp1 = load i32* %tmp
@@ -8,6 +10,9 @@ define i32 @f1() {
 }
 
 define i32 @f2() {
+; CHECK: f2:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
 	%tmp1 = load i8* %tmp
@@ -16,6 +21,8 @@ define i32 @f2() {
 }
 
 define i32 @f3() {
+; CHECK: f3:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
 	%tmp1 = load i32* %tmp
@@ -23,6 +30,9 @@ define i32 @f3() {
 }
 
 define i32 @f4() {
+; CHECK: f4:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
 	%tmp1 = load i8* %tmp
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
index df6d137..8f2d98f 100644
--- a/test/CodeGen/Thumb/long-setcc.ll
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cmp | count 1
+; RUN: llc < %s -march=thumb | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index 2287443..e3ef44a 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep mvn | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep adc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __muldi3
+; RUN: llc < %s -march=thumb | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/Thumb/long_shift.ll b/test/CodeGen/Thumb/long_shift.ll
new file mode 100644
index 0000000..2431714
--- /dev/null
+++ b/test/CodeGen/Thumb/long_shift.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb
+
+define i64 @f0(i64 %A, i64 %B) {
+        %tmp = bitcast i64 %A to i64
+        %tmp2 = lshr i64 %B, 1
+        %tmp3 = sub i64 %tmp, %tmp2
+        ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+        %a = shl i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+        %a = ashr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+        %a = lshr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
diff --git a/test/CodeGen/Thumb/mul.ll b/test/CodeGen/Thumb/mul.ll
new file mode 100644
index 0000000..c1a2fb2
--- /dev/null
+++ b/test/CodeGen/Thumb/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb | grep mul | count 3
+; RUN: llc < %s -march=thumb | grep lsl | count 1
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+    %tmp = mul i32 %u, 5
+    ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+    %tmp = mul i32 %u, 4
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
new file mode 100644
index 0000000..c5e86ad
--- /dev/null
+++ b/test/CodeGen/Thumb/pop.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t(i8* %a, ...) nounwind {
+; CHECK:      t:
+; CHECK:      pop {r3}
+; CHECK-NEXT: add sp, #3 * 4
+; CHECK-NEXT: bx r3
+entry:
+  %a.addr = alloca i8*
+  store i8* %a, i8** %a.addr
+  ret void
+}
diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll
new file mode 100644
index 0000000..63773c4
--- /dev/null
+++ b/test/CodeGen/Thumb/push.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-fp-elim | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t() nounwind {
+; CHECK:       t:
+; CHECK-NEXT : push {r7}
+entry:
+  call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index ae75549..7a183b0 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep blt | count 3
-; RUN: llvm-as < %s | llc -march=thumb | grep ble | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bls | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bhi | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __ltdf2
+; RUN: llc < %s -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bgt | count 1
+; RUN: llc < %s -march=thumb | grep blt | count 3
+; RUN: llc < %s -march=thumb | grep ble | count 1
+; RUN: llc < %s -march=thumb | grep bls | count 1
+; RUN: llc < %s -march=thumb | grep bhi | count 1
+; RUN: llc < %s -march=thumb | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb/stack-frame.ll b/test/CodeGen/Thumb/stack-frame.ll
index 756d257..b103b33 100644
--- a/test/CodeGen/Thumb/stack-frame.ll
+++ b/test/CodeGen/Thumb/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -march=thumb | grep add | count 1
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -march=thumb | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/Thumb/thumb-imm.ll b/test/CodeGen/Thumb/thumb-imm.ll
index 2be393a..74a57ff 100644
--- a/test/CodeGen/Thumb/thumb-imm.ll
+++ b/test/CodeGen/Thumb/thumb-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep CPI
+; RUN: llc < %s -march=thumb | not grep CPI
 
 
 define i32 @test1() {
diff --git a/test/CodeGen/Thumb/tst_teq.ll b/test/CodeGen/Thumb/tst_teq.ll
index 0456951..21ada3e 100644
--- a/test/CodeGen/Thumb/tst_teq.ll
+++ b/test/CodeGen/Thumb/tst_teq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep tst
+; RUN: llc < %s -march=thumb | grep tst
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Thumb/unord.ll b/test/CodeGen/Thumb/unord.ll
index 4202d26..39458ae 100644
--- a/test/CodeGen/Thumb/unord.ll
+++ b/test/CodeGen/Thumb/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep bne | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bne | count 1
+; RUN: llc < %s -march=thumb | grep beq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index a18010f..16a9c44 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux | grep pop | count 1
-; RUN: llvm-as < %s | llc -mtriple=thumb-darwin | grep pop | count 2
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1
+; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
new file mode 100644
index 0000000..8f2283f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv6t2-elf"
+	%struct.dwarf_cie = type <{ i32, i32, i8, [0 x i8], [3 x i8] }>
+
+declare arm_apcscc i8* @read_sleb128(i8*, i32* nocapture) nounwind
+
+define arm_apcscc i32 @get_cie_encoding(%struct.dwarf_cie* %cie) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb13
+
+bb1:		; preds = %entry
+	%tmp38 = add i32 undef, 10		; <i32> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb1
+	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
+	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
+	%p_addr.0.i = getelementptr i8* undef, i32 %tmp39		; <i8*> [#uses=1]
+	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
+	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br i1 %1, label %bb.i, label %read_uleb128.exit
+
+read_uleb128.exit:		; preds = %bb.i
+	%.sum40 = add i32 %indvar.i, undef		; <i32> [#uses=1]
+	%.sum31 = add i32 %.sum40, 2		; <i32> [#uses=1]
+	%scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31		; <i8*> [#uses=1]
+	%3 = call arm_apcscc  i8* @read_sleb128(i8* %scevgep.i, i32* undef)		; <i8*> [#uses=0]
+	unreachable
+
+bb13:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
new file mode 100644
index 0000000..ec649c3
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; rdar://7076238
+
+@"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
+
+define arm_apcscc i32 @t(i32, ...) nounwind {
+entry:
+; CHECK: t:
+; CHECK: add r7, sp, #3 * 4
+	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
+	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
+	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
+	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
+	%7 = getelementptr i8* %1, i32 12		; <i8*> [#uses=1]
+	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
+	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
+	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
+	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
+	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
+	%18 = call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%19 = add i32 0, %15		; <i32> [#uses=1]
+	%20 = add i32 %19, %16		; <i32> [#uses=1]
+	%21 = add i32 %20, %14		; <i32> [#uses=1]
+	%22 = add i32 %21, %17		; <i32> [#uses=1]
+	%23 = add i32 %22, 0		; <i32> [#uses=1]
+	ret i32 %23
+}
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
new file mode 100644
index 0000000..4d21f9b
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2
+; rdar://7083961
+
+define arm_apcscc i32 @value(i64 %b1, i64 %b2) nounwind readonly {
+entry:
+	%0 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	%mod.0.ph.ph = select i1 %0, float -1.000000e+00, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	br i1 undef, label %bb86.preheader, label %bb7
+
+bb86.preheader:		; preds = %bb7
+	%1 = fmul float %mod.0.ph.ph, 5.000000e+00		; <float> [#uses=0]
+	br label %bb79
+
+bb79:		; preds = %bb79, %bb86.preheader
+	br i1 undef, label %bb119, label %bb79
+
+bb119:		; preds = %bb79
+	ret i32 undef
+}
diff --git a/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
new file mode 100644
index 0000000..f74d12e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
+	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=11]
+	%tmp39 = add i32 %indvar, 8		; <i32> [#uses=0]
+	%tmp41 = add i32 %indvar, 16		; <i32> [#uses=2]
+	%scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41		; <float*> [#uses=1]
+	%tmp43 = add i32 %indvar, 24		; <i32> [#uses=1]
+	%scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43		; <float*> [#uses=1]
+	%tmp45 = add i32 %indvar, 32		; <i32> [#uses=1]
+	%scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45		; <float*> [#uses=1]
+	%tmp47 = add i32 %indvar, 40		; <i32> [#uses=1]
+	%scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47		; <float*> [#uses=1]
+	%tmp49 = add i32 %indvar, 48		; <i32> [#uses=1]
+	%scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49		; <float*> [#uses=1]
+	%tmp51 = add i32 %indvar, 56		; <i32> [#uses=1]
+	%scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51		; <float*> [#uses=1]
+	%wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar		; <float*> [#uses=1]
+	%tmp54 = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%scevgep76 = getelementptr i8* undef, i32 %tmp54		; <i8*> [#uses=1]
+	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
+	%scevgep79 = getelementptr i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
+	%inptr.117 = getelementptr i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
+	%1 = load i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
+	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%6 = fmul float %4, %5		; <float> [#uses=1]
+	%7 = fsub float %6, undef		; <float> [#uses=2]
+	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%9 = fsub float %8, 0.000000e+00		; <float> [#uses=2]
+	%10 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%11 = fadd float %7, %9		; <float> [#uses=2]
+	%12 = fsub float %7, %9		; <float> [#uses=2]
+	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
+	%14 = fmul float %13, undef		; <float> [#uses=2]
+	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
+	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%17 = fmul float %15, %16		; <float> [#uses=1]
+	%18 = fadd float %14, undef		; <float> [#uses=2]
+	%19 = fsub float %14, undef		; <float> [#uses=2]
+	%20 = fadd float undef, %17		; <float> [#uses=2]
+	%21 = fadd float %20, %18		; <float> [#uses=3]
+	%22 = fsub float %20, %18		; <float> [#uses=1]
+	%23 = fmul float %22, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%24 = fadd float %19, undef		; <float> [#uses=1]
+	%25 = fmul float %24, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%26 = fmul float undef, 0x3FF1517A80000000		; <float> [#uses=1]
+	%27 = fsub float %26, %25		; <float> [#uses=1]
+	%28 = fmul float %19, 0xC004E7AEA0000000		; <float> [#uses=1]
+	%29 = fadd float %28, %25		; <float> [#uses=1]
+	%30 = fsub float %29, %21		; <float> [#uses=3]
+	%31 = fsub float %23, %30		; <float> [#uses=3]
+	%32 = fadd float %27, %31		; <float> [#uses=1]
+	%33 = fadd float %10, %21		; <float> [#uses=1]
+	store float %33, float* %wsptr.119, align 4
+	%34 = fsub float %10, %21		; <float> [#uses=1]
+	store float %34, float* %scevgep52, align 4
+	%35 = fadd float %11, %30		; <float> [#uses=1]
+	store float %35, float* null, align 4
+	%36 = fsub float %11, %30		; <float> [#uses=1]
+	store float %36, float* %scevgep50, align 4
+	%37 = fadd float %12, %31		; <float> [#uses=1]
+	store float %37, float* %scevgep42, align 4
+	%38 = fsub float %12, %31		; <float> [#uses=1]
+	store float %38, float* %scevgep48, align 4
+	%39 = fadd float undef, %32		; <float> [#uses=1]
+	store float %39, float* %scevgep46, align 4
+	store float undef, float* %scevgep44, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb6, label %bb
+
+bb6:		; preds = %bb
+	%.sum10 = add i32 %output_col, 1		; <i32> [#uses=1]
+	%.sum8 = add i32 %output_col, 6		; <i32> [#uses=1]
+	%.sum6 = add i32 %output_col, 2		; <i32> [#uses=1]
+	%.sum = add i32 %output_col, 3		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb8, %bb6
+	%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ]		; <i32> [#uses=3]
+	%scevgep = getelementptr i8** %output_buf, i32 %ctr.116		; <i8**> [#uses=1]
+	%tmp = shl i32 %ctr.116, 3		; <i32> [#uses=5]
+	%tmp2392 = or i32 %tmp, 4		; <i32> [#uses=1]
+	%scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392		; <float*> [#uses=1]
+	%tmp2591 = or i32 %tmp, 2		; <i32> [#uses=1]
+	%scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591		; <float*> [#uses=1]
+	%tmp2790 = or i32 %tmp, 6		; <i32> [#uses=1]
+	%scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
+	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
+	%wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
+	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%43 = fadd float %41, %42		; <float> [#uses=1]
+	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%46 = fadd float %44, %45		; <float> [#uses=1]
+	%47 = fsub float %43, %46		; <float> [#uses=2]
+	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
+	%49 = fadd float 0.000000e+00, undef		; <float> [#uses=1]
+	%50 = fptosi float %49 to i32		; <i32> [#uses=1]
+	%51 = add i32 %50, 4		; <i32> [#uses=1]
+	%52 = lshr i32 %51, 3		; <i32> [#uses=1]
+	%53 = and i32 %52, 1023		; <i32> [#uses=1]
+	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
+	%54 = getelementptr i8* %0, i32 %.sum14		; <i8*> [#uses=1]
+	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	store i8 %55, i8* null, align 1
+	%56 = getelementptr i8* %40, i32 %.sum10		; <i8*> [#uses=1]
+	store i8 0, i8* %56, align 1
+	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%58 = getelementptr i8* %40, i32 %.sum8		; <i8*> [#uses=1]
+	store i8 %57, i8* %58, align 1
+	%59 = fadd float undef, %48		; <float> [#uses=1]
+	%60 = fptosi float %59 to i32		; <i32> [#uses=1]
+	%61 = add i32 %60, 4		; <i32> [#uses=1]
+	%62 = lshr i32 %61, 3		; <i32> [#uses=1]
+	%63 = and i32 %62, 1023		; <i32> [#uses=1]
+	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
+	%64 = getelementptr i8* %0, i32 %.sum7		; <i8*> [#uses=1]
+	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%66 = getelementptr i8* %40, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %65, i8* %66, align 1
+	%67 = fptosi float undef to i32		; <i32> [#uses=1]
+	%68 = add i32 %67, 4		; <i32> [#uses=1]
+	%69 = lshr i32 %68, 3		; <i32> [#uses=1]
+	%70 = and i32 %69, 1023		; <i32> [#uses=1]
+	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
+	%71 = getelementptr i8* %0, i32 %.sum5		; <i8*> [#uses=1]
+	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	store i8 %72, i8* undef, align 1
+	%73 = fadd float %47, undef		; <float> [#uses=1]
+	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
+	%75 = add i32 %74, 4		; <i32> [#uses=1]
+	%76 = lshr i32 %75, 3		; <i32> [#uses=1]
+	%77 = and i32 %76, 1023		; <i32> [#uses=1]
+	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
+	%78 = getelementptr i8* %0, i32 %.sum3		; <i8*> [#uses=1]
+	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	store i8 %79, i8* undef, align 1
+	%80 = fsub float %47, undef		; <float> [#uses=1]
+	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
+	%82 = add i32 %81, 4		; <i32> [#uses=1]
+	%83 = lshr i32 %82, 3		; <i32> [#uses=1]
+	%84 = and i32 %83, 1023		; <i32> [#uses=1]
+	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
+	%85 = getelementptr i8* %0, i32 %.sum1		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%87 = getelementptr i8* %40, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %88, 8		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb8
+
+return:		; preds = %bb8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
new file mode 100644
index 0000000..a8e86d5
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+@csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@cll = external global [20 x [10 x i8]]		; <[20 x [10 x i8]]*> [#uses=1]
+@lefline = external global [100 x [20 x i32]]		; <[100 x [20 x i32]]*> [#uses=1]
+@sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
+
+define arm_apcscc void @main(i32 %argc, i8** %argv) noreturn nounwind {
+; CHECK: main:
+; CHECK: ldrb
+entry:
+	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
+	%line.i.i.i = alloca [200 x i8], align 1		; <[200 x i8]*> [#uses=1]
+	%line.i = alloca [1024 x i8], align 1		; <[1024 x i8]*> [#uses=0]
+	br i1 undef, label %bb.i.i, label %bb4.preheader.i
+
+bb.i.i:		; preds = %entry
+	unreachable
+
+bb4.preheader.i:		; preds = %entry
+	br i1 undef, label %tbl.exit, label %bb.i.preheader
+
+bb.i.preheader:		; preds = %bb4.preheader.i
+	%line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.backedge.i, %bb.i.preheader
+	br i1 undef, label %bb3.i, label %bb4.backedge.i
+
+bb3.i:		; preds = %bb.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb.i183.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb2.i184.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
+
+bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
+	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
+	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
+
+bb5.i185.i.i:		; preds = %bb2.i184.i.i
+	br label %bb.i171.i.i
+
+bb9.i186.i.i:		; preds = %bb35.preheader.i.i.i
+	unreachable
+
+bb36.i.i.i:		; preds = %bb35.preheader.i.i.i
+	br label %bb.i171.i.i
+
+bb.i171.i.i:		; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
+	%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ]		; <i32> [#uses=6]
+	%scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2		; <i32*> [#uses=1]
+	%scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i32 -1, i32* %scevgep16.i.i.i, align 4
+	br label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %bb1.i175.i.i, %bb.i171.i.i
+	%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ]		; <i32> [#uses=4]
+	%scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2		; <i32*> [#uses=1]
+	%scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=1]
+	%scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i8 0, i8* %scevgep12.i.i.i, align 1
+	store i32 0, i32* %scevgep11.i.i.i, align 4
+	store i32 108, i32* undef, align 4
+	%3 = add i32 %i.03.i172.i.i, 1		; <i32> [#uses=2]
+	%exitcond.i174.i.i = icmp eq i32 %3, 100		; <i1> [#uses=1]
+	br i1 %exitcond.i174.i.i, label %bb3.i176.i.i, label %bb1.i175.i.i
+
+bb3.i176.i.i:		; preds = %bb1.i175.i.i
+	%4 = add i32 %2, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb5.i177.i.i, label %bb.i171.i.i
+
+bb5.i177.i.i:		; preds = %bb3.i176.i.i
+	unreachable
+
+bb4.backedge.i:		; preds = %bb.i
+	br i1 undef, label %tbl.exit, label %bb.i
+
+tbl.exit:		; preds = %bb4.backedge.i, %bb4.preheader.i
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
new file mode 100644
index 0000000..6cbfd0d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i32* }
+	%"struct.std::_Vector_base<char,std::allocator<char> >" = type { %"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" }
+	%"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" = type { i8*, i8*, i8* }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" = type { i16*, i16*, i16* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" }
+	%"struct.xalanc_1_8::FormatterListener" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32 }
+	%"struct.xalanc_1_8::FormatterToXML" = type { %"struct.xalanc_1_8::FormatterListener", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, %"struct.xalanc_1_8::XalanOutputStream"*, i16, [256 x i16], [256 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", i32, i32, %"struct.std::vector<bool,std::allocator<bool> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32, %"struct.std::CharVectorType", %"struct.std::vector<bool,std::allocator<bool> >", %0, %1, %2, %3, %0, %1, %2, %3, %4, i16*, i32 }
+	%"struct.xalanc_1_8::XalanDOMString" = type { %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32 }
+	%"struct.xalanc_1_8::XalanOutputStream" = type { i32 (...)**, i32, %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32, %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, %"struct.std::CharVectorType" }
+
+declare arm_apcscc void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"*)
+
+define arm_apcscc void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
+entry:
+	%0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13		; <i8*> [#uses=1]
+	br i1 undef, label %bb4, label %bb
+
+bb:		; preds = %entry
+	store i8 0, i8* %0, align 1
+	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%3 = getelementptr i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
+	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
+	tail call arm_apcscc  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
+	ret void
+
+bb4:		; preds = %entry
+	tail call arm_apcscc  void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"* %this)
+	tail call arm_apcscc  void undef(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 0, i32 %length, i8 zeroext undef)
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
new file mode 100644
index 0000000..ebe9d46
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi
+; PR4681
+
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str2 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+
+define arm_aapcscc i32 @__mf_heuristic_check(i32 %ptr, i32 %ptr_high) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb9, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
+	%1 = call arm_aapcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb9:		; preds = %bb1
+	ret i32 undef
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare arm_aapcscc i32 @fprintf(%struct.FILE* noalias nocapture, i8* noalias nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
new file mode 100644
index 0000000..319d29b
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=2]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	br label %bb5
+
+bb5:		; preds = %bb5, %entry
+	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
+	%y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	br i1 undef, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%0 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext 0, %struct.rec** undef, %struct.FILE_POS* null, i32* undef) nounwind		; <%struct.FILE*> [#uses=1]
+	br i1 false, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb6
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb6
+	br i1 undef, label %bb.i1, label %FontSize.exit
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %FontSize.exit
+
+FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
+	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%3 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%4 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
+	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%8 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = sub i32 0, %10		; <i32> [#uses=1]
+	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%13 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb79.critedge, %bb1.i3, %FontName.exit
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ]		; <%struct.rec*> [#uses=1]
+	%14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	br i1 undef, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	br i1 undef, label %bb3.i77, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	br i1 false, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88:		; preds = %bb3.i85
+	br i1 undef, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i.i68, %StringBeginsWith.exit88, %bb2.i84
+	br i1 false, label %bb1.i58, label %bb2.i.i68
+
+bb1.i58:		; preds = %bb3.i77
+	unreachable
+
+bb.i47:		; preds = %bb3.i52
+	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
+
+bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
+	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i.i15.critedge:		; preds = %bb.i47
+	%16 = call arm_apcscc  i8* @fgets(i8* undef, i32 512, %struct.FILE* %0) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb2.i8:		; preds = %bb100.outer
+	br i1 undef, label %bb1.i3, label %bb79.critedge
+
+bb1.i3:		; preds = %bb2.i8
+	br label %bb100.outer.outer
+
+bb79.critedge:		; preds = %bb2.i8
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	br label %bb100.outer.outer
+
+bb100.outer:		; preds = %bb2.i.i15.critedge, %bb100.outer.outer
+	%state.0.ph = phi i32 [ 0, %bb100.outer.outer ], [ %iftmp.560.0, %bb2.i.i15.critedge ]		; <i32> [#uses=1]
+	%cond = icmp eq i32 %state.0.ph, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb2.i8, label %bb2.i84
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
new file mode 100644
index 0000000..a62b612
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -0,0 +1,508 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim -O3
+
+	type { i16, i8, i8 }		; type %0
+	type { [2 x i32], [2 x i32] }		; type %1
+	type { %struct.GAP }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	type { i8, i8, i8, i8 }		; type %5
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %0 }
+	%struct.STYLE = type { %2, %2, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %1 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@.str19294 = external constant [9 x i8], align 1		; <[9 x i8]*> [#uses=1]
+@zz_lengths = external global [150 x i8]		; <[150 x i8]*> [#uses=1]
+@next_free.4772 = external global i8**		; <i8***> [#uses=3]
+@top_free.4773 = external global i8**		; <i8***> [#uses=2]
+@.str1575 = external constant [32 x i8], align 1		; <[32 x i8]*> [#uses=1]
+@zz_free = external global [524 x %struct.rec*]		; <[524 x %struct.rec*]*> [#uses=2]
+@zz_hold = external global %struct.rec*		; <%struct.rec**> [#uses=5]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@zz_res = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@xx_link = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@font_count = external global i32		; <i32*> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str101874 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+@.str111875 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str141878 = external constant [27 x i8], align 1		; <[27 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=3]
+@.str192782 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@currentfont = external global i32		; <i32*> [#uses=3]
+@wordcount = external global i32		; <i32*> [#uses=1]
+@needs = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str742838 = external constant [6 x i8], align 1		; <[6 x i8]*> [#uses=1]
+@.str752839 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str1802944 = external constant [40 x i8], align 1		; <[40 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+@.str1852949 = external constant [23 x i8], align 1		; <[23 x i8]*> [#uses=1]
+@.str1872951 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1932957 = external constant [26 x i8], align 1		; <[26 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc i32 @remove(i8* nocapture) nounwind
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc %struct.rec* @MakeWord(i32, i8* nocapture, %struct.FILE_POS*) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fputs"(i8*, %struct.FILE*)
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
+	%0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
+	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%2 = add i8 %1, -94		; <i8> [#uses=1]
+	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
+	%6 = getelementptr %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3
+	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	br i1 false, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
+	%12 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	unreachable
+
+bb8:		; preds = %bb6
+	%13 = and i32 undef, 4095		; <i32> [#uses=2]
+	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%15 = icmp ult i32 0, %13		; <i1> [#uses=1]
+	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb9
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb9
+	%16 = phi i32 [ %.pre186, %bb.i ], [ %13, %bb9 ]		; <i32> [#uses=1]
+	br i1 false, label %bb.i1, label %bb1.i
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
+	br i1 undef, label %bb2.i, label %FontSize.exit
+
+bb2.i:		; preds = %bb1.i
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	unreachable
+
+FontSize.exit:		; preds = %bb1.i
+	%17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
+	%18 = load i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
+	br i1 %21, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
+	%23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
+	%24 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	br label %bb10
+
+bb10:		; preds = %FontName.exit, %bb8
+	%25 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%28 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%29 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = sub i32 0, %31		; <i32> [#uses=1]
+	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%34 = sub i32 0, %33		; <i32> [#uses=1]
+	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%36 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
+	%39 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
+	%40 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	%41 = getelementptr [512 x i8]* %buff, i32 0, i32 26		; <i8*> [#uses=1]
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb83, %bb10
+	%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ]		; <%struct.rec*> [#uses=1]
+	%42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	%43 = icmp eq i8 %44, %46		; <i1> [#uses=1]
+	%indvar.next.i79 = add i32 %indvar.i81, 1		; <i32> [#uses=1]
+	br i1 %43, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
+	%pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
+	%sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
+	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
+	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
+	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
+	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit88
+
+StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
+	%48 = phi i8 [ %.pr, %StringBeginsWith.exit88thread-split ], [ %46, %bb3.i85 ]		; <i8> [#uses=1]
+	%phitmp91 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %phitmp91, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br i1 false, label %bb2.i51, label %bb2.i75
+
+bb2.i75:		; preds = %bb2.i.i68
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
+	%sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
+	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
+	br i1 %50, label %bb24, label %bb2.i.i68
+
+bb24:		; preds = %bb3.i77
+	%51 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
+	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
+	%54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
+	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
+	br i1 %56, label %bb27, label %bb28
+
+bb27:		; preds = %bb24
+	br i1 undef, label %bb.i56, label %GetMemory.exit62
+
+bb.i56:		; preds = %bb27
+	br i1 undef, label %bb1.i58, label %bb2.i60
+
+bb1.i58:		; preds = %bb.i56
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i60
+
+bb2.i60:		; preds = %bb1.i58, %bb.i56
+	%.pre1.i59 = phi i8** [ undef, %bb1.i58 ], [ undef, %bb.i56 ]		; <i8**> [#uses=1]
+	store i8** undef, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit62
+
+GetMemory.exit62:		; preds = %bb2.i60, %bb27
+	%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ]		; <i8**> [#uses=1]
+	%58 = getelementptr i8** %57, i32 %53		; <i8**> [#uses=1]
+	store i8** %58, i8*** @next_free.4772, align 4
+	store %struct.rec* undef, %struct.rec** @zz_hold, align 4
+	br label %bb29
+
+bb28:		; preds = %bb24
+	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
+	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %59, %struct.rec** %54, align 4
+	br label %bb29
+
+bb29:		; preds = %bb28, %GetMemory.exit62
+	%.pre184 = phi %struct.rec* [ %55, %bb28 ], [ undef, %GetMemory.exit62 ]		; <%struct.rec*> [#uses=3]
+	store i8 0, i8* undef
+	store %struct.rec* %.pre184, %struct.rec** @xx_link, align 4
+	br i1 undef, label %bb35, label %bb31
+
+bb31:		; preds = %bb29
+	store %struct.rec* %.pre184, %struct.rec** undef
+	br label %bb35
+
+bb35:		; preds = %bb31, %bb29
+	br i1 undef, label %bb41, label %bb37
+
+bb37:		; preds = %bb35
+	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %60, %struct.rec** undef
+	store %struct.rec* undef, %struct.rec** null
+	store %struct.rec* %.pre184, %struct.rec** null, align 4
+	br label %bb41
+
+bb41:		; preds = %bb37, %bb35
+	%61 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=1]
+	%62 = icmp eq i8* %61, null		; <i1> [#uses=1]
+	%iftmp.554.0 = select i1 %62, i32 2, i32 1		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i47:		; preds = %bb3.i52
+	%63 = icmp eq i8 %64, %65		; <i1> [#uses=1]
+	br i1 %63, label %bb2.i51, label %bb2.i41
+
+bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
+	%pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
+	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br i1 false, label %bb49, label %bb2.i41
+
+bb49:		; preds = %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
+	br i1 false, label %bb2.i41, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb2.i41
+	%pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0		; <i8*> [#uses=1]
+	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
+
+bb3.i.i16:		; preds = %bb2.i.i15
+	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exit.i20:		; preds = %StringBeginsWith.exitthread-split.i18, %bb3.i.i16
+	%67 = phi i8 [ undef, %StringBeginsWith.exitthread-split.i18 ], [ %66, %bb3.i.i16 ]		; <i8> [#uses=1]
+	%phitmp.i19 = icmp eq i8 %67, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i19, label %bb58, label %bb2.i6.i26
+
+bb2.i6.i26:		; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
+	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
+	%sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
+	%pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
+	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb2.i6.i26, label %bb55
+
+bb55:		; preds = %bb2.i6.i26
+	%69 = call arm_apcscc  i32 @"\01_fputs"(i8* %buff14, %struct.FILE* undef) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb58:		; preds = %StringBeginsWith.exit.i20
+	%70 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i7:		; preds = %bb3.i
+	br i1 false, label %bb2.i8, label %bb2.i.i
+
+bb2.i8:		; preds = %bb100.outer, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split, label %bb3.i
+
+bb3.i:		; preds = %bb2.i8
+	br i1 undef, label %StringBeginsWith.exit, label %bb.i7
+
+StringBeginsWith.exitthread-split:		; preds = %bb2.i8
+	br label %StringBeginsWith.exit
+
+StringBeginsWith.exit:		; preds = %StringBeginsWith.exitthread-split, %bb3.i
+	%phitmp93 = icmp eq i8 undef, 0		; <i1> [#uses=1]
+	br i1 %phitmp93, label %bb66, label %bb2.i.i
+
+bb66:		; preds = %StringBeginsWith.exit
+	%71 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
+	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
+	%74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
+	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
+	br i1 %76, label %bb69, label %bb70
+
+bb69:		; preds = %bb66
+	br i1 undef, label %bb.i2, label %GetMemory.exit
+
+bb.i2:		; preds = %bb69
+	%77 = call arm_apcscc  noalias i8* @calloc(i32 1020, i32 4) nounwind		; <i8*> [#uses=1]
+	%78 = bitcast i8* %77 to i8**		; <i8**> [#uses=3]
+	store i8** %78, i8*** @next_free.4772, align 4
+	br i1 undef, label %bb1.i3, label %bb2.i4
+
+bb1.i3:		; preds = %bb.i2
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i4
+
+bb2.i4:		; preds = %bb1.i3, %bb.i2
+	%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%80 = getelementptr i8** %79, i32 1020		; <i8**> [#uses=1]
+	store i8** %80, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit
+
+GetMemory.exit:		; preds = %bb2.i4, %bb69
+	%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ]		; <i8**> [#uses=2]
+	%82 = bitcast i8** %81 to %struct.rec*		; <%struct.rec*> [#uses=3]
+	%83 = getelementptr i8** %81, i32 %73		; <i8**> [#uses=1]
+	store i8** %83, i8*** @next_free.4772, align 4
+	store %struct.rec* %82, %struct.rec** @zz_hold, align 4
+	br label %bb71
+
+bb70:		; preds = %bb66
+	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %84, %struct.rec** %74, align 4
+	br label %bb71
+
+bb71:		; preds = %bb70, %GetMemory.exit
+	%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=8]
+	%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=1]
+	%86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=0]
+	%87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=0]
+	%88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
+	br i1 false, label %bb77, label %bb73
+
+bb73:		; preds = %bb71
+	%90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	store %struct.rec* %.pre185, %struct.rec** %90
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb77
+
+bb77:		; preds = %bb73, %bb71
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	store %struct.rec* %71, %struct.rec** @zz_hold, align 4
+	br i1 undef, label %bb83, label %bb79
+
+bb79:		; preds = %bb77
+	%91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %92, %struct.rec** %91
+	%93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %71, %struct.rec** %93, align 4
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb83
+
+bb83:		; preds = %bb79, %bb77
+	br label %bb100.outer.outer
+
+bb.i.i:		; preds = %bb3.i.i
+	br i1 undef, label %bb2.i.i, label %bb2.i6.i
+
+bb2.i.i:		; preds = %bb.i.i, %StringBeginsWith.exit, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split.i, label %bb3.i.i
+
+bb3.i.i:		; preds = %bb2.i.i
+	br i1 undef, label %StringBeginsWith.exit.i, label %bb.i.i
+
+StringBeginsWith.exitthread-split.i:		; preds = %bb2.i.i
+	br label %StringBeginsWith.exit.i
+
+StringBeginsWith.exit.i:		; preds = %StringBeginsWith.exitthread-split.i, %bb3.i.i
+	br i1 false, label %bb94, label %bb2.i6.i
+
+bb.i2.i:		; preds = %bb3.i7.i
+	br i1 false, label %bb2.i6.i, label %bb91
+
+bb2.i6.i:		; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
+	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
+
+bb3.i7.i:		; preds = %bb2.i6.i
+	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	br i1 undef, label %strip_out.exit, label %bb.i2.i
+
+strip_out.exitthread-split:		; preds = %bb2.i6.i
+	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	br label %strip_out.exit
+
+strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
+	%95 = phi i8 [ %.pr100, %strip_out.exitthread-split ], [ %94, %bb3.i7.i ]		; <i8> [#uses=0]
+	br i1 undef, label %bb94, label %bb91
+
+bb91:		; preds = %strip_out.exit, %bb.i2.i
+	unreachable
+
+bb94:		; preds = %strip_out.exit, %StringBeginsWith.exit.i
+	%96 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb100.outer:		; preds = %bb58, %bb41, %bb100.outer.outer
+	%state.0.ph = phi i32 [ %state.0.ph.ph, %bb100.outer.outer ], [ %iftmp.560.0, %bb58 ], [ %iftmp.554.0, %bb41 ]		; <i32> [#uses=1]
+	switch i32 %state.0.ph, label %bb2.i84 [
+		i32 2, label %bb101.split
+		i32 1, label %bb2.i8
+	]
+
+bb101.split:		; preds = %bb100.outer
+	%97 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %97, label %bb103, label %bb102
+
+bb102:		; preds = %bb101.split
+	%98 = call arm_apcscc  i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb103:		; preds = %bb101.split
+	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%100 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @wordcount, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
new file mode 100644
index 0000000..3cbb212
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp 
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | grep fcpys | count 1
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Results = type { float, float, float }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc void @get_results(%struct.Results* noalias nocapture sret %agg.result, %struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+	br i1 undef, label %bb15, label %bb13
+
+bb:		; preds = %entry
+	ret void
+
+bb13:		; preds = %bb13, %bb6.preheader
+	%0 = fadd float undef, undef		; <float> [#uses=1]
+	%1 = fadd float undef, 1.000000e+00		; <float> [#uses=1]
+	br i1 undef, label %bb15, label %bb13
+
+bb15:		; preds = %bb13, %bb6.preheader
+	%r1.0.0.lcssa = phi float [ 0.000000e+00, %bb6.preheader ], [ %1, %bb13 ]		; <float> [#uses=1]
+	%r1.1.0.lcssa = phi float [ undef, %bb6.preheader ], [ %0, %bb13 ]		; <float> [#uses=0]
+	store float %r1.0.0.lcssa, float* undef, align 4
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
new file mode 100644
index 0000000..acf562c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = fmul float undef, 0x41E0000000000000		; <float> [#uses=1]
+	%1 = fptosi float %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	br i1 undef, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
new file mode 100644
index 0000000..3ada026
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
+	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
+	%4 = sub i32 0, %3		; <i32> [#uses=1]
+	%5 = xor i32 %4, 123459876		; <i32> [#uses=1]
+	%idum_addr.0.i.i = select i1 undef, i32 undef, i32 %5		; <i32> [#uses=1]
+	%6 = sitofp i32 %idum_addr.0.i.i to double		; <double> [#uses=1]
+	%7 = fmul double %6, 0x3E00000000200000		; <double> [#uses=1]
+	%8 = fptrunc double %7 to float		; <float> [#uses=2]
+	%9 = fmul float %8, 0x41E0000000000000		; <float> [#uses=1]
+	%10 = fptosi float %9 to i32		; <i32> [#uses=1]
+	store i32 %10, i32* undef, align 4
+	%11 = fpext float %8 to double		; <double> [#uses=1]
+	%12 = fcmp ogt double %11, 6.660000e-01		; <i1> [#uses=1]
+	br i1 %12, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 0000000..03f9fac
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: mov sp, r7
+; CHECK: sub sp, #1 * 4
+	call arm_aapcscc  void @__gcov_flush() nounwind
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb5, label %bb
+
+bb5:		; preds = %bb, %entry
+	%0 = alloca i8*, i32 undef, align 4		; <i8**> [#uses=1]
+	%1 = call arm_aapcscc  i32 @execvp(i8* %path, i8** %0) nounwind		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare hidden arm_aapcscc void @__gcov_flush()
+
+declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
new file mode 100644
index 0000000..93f5a0f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+	%a = type { i32 (...)** }
+	%b = type { %a }
+	%c = type { float, float, float, float }
+
+declare arm_aapcs_vfpcc float @bar(%c*)
+
+define arm_aapcs_vfpcc void @foo(%b* %x, %c* %y) {
+entry:
+	%0 = call arm_aapcs_vfpcc  float @bar(%c* %y)		; <float> [#uses=0]
+	%1 = fadd float undef, undef		; <float> [#uses=1]
+	store float %1, float* undef, align 8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
new file mode 100644
index 0000000..090ed2d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -arm-use-neon-fp
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%1 = fmul float undef, %0		; <float> [#uses=2]
+	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
+	%scevgep74 = getelementptr i8* null, i32 %tmp73		; <i8*> [#uses=1]
+	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
+	%2 = load float* null, align 4		; <float> [#uses=1]
+	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
+	%4 = fadd float %1, %3		; <float> [#uses=1]
+	%5 = fsub float %1, %3		; <float> [#uses=2]
+	%6 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%7 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%8 = fsub float %7, %6		; <float> [#uses=2]
+	%9 = fsub float %4, %6		; <float> [#uses=1]
+	%10 = fadd float %5, %8		; <float> [#uses=2]
+	%11 = fsub float %5, %8		; <float> [#uses=1]
+	%12 = sitofp i16 undef to float		; <float> [#uses=1]
+	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
+	%14 = sitofp i16 undef to float		; <float> [#uses=1]
+	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%16 = fmul float %14, %15		; <float> [#uses=2]
+	%17 = fadd float undef, undef		; <float> [#uses=2]
+	%18 = fadd float %13, %16		; <float> [#uses=2]
+	%19 = fsub float %13, %16		; <float> [#uses=1]
+	%20 = fadd float %18, %17		; <float> [#uses=2]
+	%21 = fsub float %18, %17		; <float> [#uses=1]
+	%22 = fmul float %21, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%23 = fmul float undef, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%24 = fmul float %19, 0x3FF1517A80000000		; <float> [#uses=1]
+	%25 = fsub float %24, %23		; <float> [#uses=1]
+	%26 = fadd float undef, %23		; <float> [#uses=1]
+	%27 = fsub float %26, %20		; <float> [#uses=3]
+	%28 = fsub float %22, %27		; <float> [#uses=2]
+	%29 = fadd float %25, %28		; <float> [#uses=1]
+	%30 = fadd float undef, %20		; <float> [#uses=1]
+	store float %30, float* undef, align 4
+	%31 = fadd float %10, %27		; <float> [#uses=1]
+	store float %31, float* undef, align 4
+	%32 = fsub float %10, %27		; <float> [#uses=1]
+	store float %32, float* undef, align 4
+	%33 = fadd float %11, %28		; <float> [#uses=1]
+	store float %33, float* undef, align 4
+	%34 = fsub float %9, %29		; <float> [#uses=1]
+	store float %34, float* undef, align 4
+	br label %bb
+}
diff --git a/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
new file mode 100644
index 0000000..a0f9918
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+@g_d = external global double		; <double*> [#uses=1]
+
+define arm_aapcscc void @foo(float %yIncr) {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	%0 = call arm_aapcs_vfpcc  float @bar()		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	store double %1, double* @g_d, align 8
+	br label %bb4
+
+bb4:		; preds = %bb, %entry
+	unreachable
+}
+
+declare arm_aapcs_vfpcc float @bar()
diff --git a/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
new file mode 100644
index 0000000..cbe250b
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+vfp2
+
+define arm_apcscc float @t1(i32 %v0) nounwind {
+entry:
+	store i32 undef, i32* undef, align 4
+	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%6 = sub i32 %5, %2		; <i32> [#uses=1]
+	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
+	ret float %7
+}
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
new file mode 100644
index 0000000..e84e867
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
+
+@getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
+
+define arm_apcscc void @t() nounwind {
+; CHECK: t:
+; CHECK:      ittt eq
+; CHECK-NEXT: addeq
+; CHECK-NEXT: movweq
+; CHECK-NEXT: movteq
+entry:
+  %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
+  br i1 undef, label %land.rhs, label %lor.end
+
+land.rhs:                                         ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %land.rhs, %entry
+  switch i32 0, label %if.end371 [
+    i32 10, label %if.then366
+    i32 14, label %if.then366
+  ]
+
+if.then366:                                       ; preds = %lor.end, %lor.end
+  unreachable
+
+if.end371:                                        ; preds = %lor.end
+  %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+  %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+  br i1 undef, label %for.body1857, label %for.end4557
+
+for.body1857:                                     ; preds = %if.end371
+  br i1 undef, label %if.then1867, label %for.cond1933
+
+if.then1867:                                      ; preds = %for.body1857
+  unreachable
+
+for.cond1933:                                     ; preds = %for.body1857
+  br i1 undef, label %for.body1940, label %if.then4493
+
+for.body1940:                                     ; preds = %for.cond1933
+  %shl = shl i32 undef, 2                         ; <i32> [#uses=1]
+  %shl1959 = shl i32 undef, 2                     ; <i32> [#uses=4]
+  br i1 undef, label %if.then1992, label %if.else2003
+
+if.then1992:                                      ; preds = %for.body1940
+  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
+  %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
+  unreachable
+
+if.else2003:                                      ; preds = %for.body1940
+  switch i32 undef, label %if.then2015 [
+    i32 10, label %if.then4382
+    i32 14, label %if.then4382
+  ]
+
+if.then2015:                                      ; preds = %if.else2003
+  br i1 undef, label %if.else2298, label %if.then2019
+
+if.then2019:                                      ; preds = %if.then2015
+  br i1 undef, label %if.then2065, label %if.else2081
+
+if.then2065:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.else2081:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.end2128:                                       ; preds = %if.else2081, %if.then2065
+  unreachable
+
+if.else2298:                                      ; preds = %if.then2015
+  br i1 undef, label %land.lhs.true2813, label %cond.end2841
+
+land.lhs.true2813:                                ; preds = %if.else2298
+  br i1 undef, label %cond.end2841, label %cond.true2824
+
+cond.true2824:                                    ; preds = %land.lhs.true2813
+  br label %cond.end2841
+
+cond.end2841:                                     ; preds = %cond.true2824, %land.lhs.true2813, %if.else2298
+  br i1 undef, label %for.cond2882.preheader, label %for.cond2940.preheader
+
+for.cond2882.preheader:                           ; preds = %cond.end2841
+  %mul3693 = shl i32 undef, 1                     ; <i32> [#uses=2]
+  br i1 undef, label %if.then3689, label %if.else3728
+
+for.cond2940.preheader:                           ; preds = %cond.end2841
+  br label %for.inc3040
+
+for.inc3040:                                      ; preds = %for.inc3040, %for.cond2940.preheader
+  br label %for.inc3040
+
+if.then3689:                                      ; preds = %for.cond2882.preheader
+  %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
+  %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
+  %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
+  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
+  %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
+  %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
+  %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.else3728:                                      ; preds = %for.cond2882.preheader
+  %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
+  %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
+  %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
+  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
+  %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
+  %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
+  %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.end3770:                                       ; preds = %if.else3728, %if.then3689
+  %vec2_y.1 = phi i32 [ %add3727, %if.then3689 ], [ %add3768, %if.else3728 ] ; <i32> [#uses=0]
+  %vec1_y.2 = phi i32 [ %add3708, %if.then3689 ], [ %add3748, %if.else3728 ] ; <i32> [#uses=0]
+  unreachable
+
+if.then4382:                                      ; preds = %if.else2003, %if.else2003
+  switch i32 undef, label %if.then4394 [
+    i32 10, label %if.else4400
+    i32 14, label %if.else4400
+  ]
+
+if.then4394:                                      ; preds = %if.then4382
+  unreachable
+
+if.else4400:                                      ; preds = %if.then4382, %if.then4382
+  br label %for.cond4451.preheader
+
+for.cond4451.preheader:                           ; preds = %for.cond4451.preheader, %if.else4400
+  br label %for.cond4451.preheader
+
+if.then4493:                                      ; preds = %for.cond1933
+  unreachable
+
+for.end4557:                                      ; preds = %if.end371
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index 3450c5a..de6f6e2 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -1,15 +1,21 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "adc r"
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "sbc r"  | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
 	%tmp = sub i64 %a, %b
 	ret i64 %tmp
 }
 
 define i64 @f2(i64 %a, i64 %b) {
 entry:
+; CHECK: f2:
+; CHECK: adds r0, r0, r0
+; CHECK: adcs r1, r1
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
         %tmp1 = shl i64 %a, 1
 	%tmp2 = sub i64 %tmp1, %b
 	ret i64 %tmp2
diff --git a/test/CodeGen/Thumb2/frameless.ll b/test/CodeGen/Thumb2/frameless.ll
new file mode 100644
index 0000000..c3c8cf1
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep mov
+; RUN: llc < %s -mtriple=thumbv7-linux -disable-fp-elim | not grep mov
+
+define arm_apcscc void @t() nounwind readnone {
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/frameless2.ll b/test/CodeGen/Thumb2/frameless2.ll
new file mode 100644
index 0000000..7cc7b19
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
+
+%struct.noise3 = type { [3 x [17 x i32]] }
+%struct.noiseguard = type { i32, i32, i32 }
+
+define arm_apcscc void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
+entry:
+  %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %1, i32* undef, align 4
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
new file mode 100644
index 0000000..865b17b
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #64 * 4
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; CHECK: test2:
+; CHECK: sub.w sp, sp, #4160
+; CHECK: sub sp, #2 * 4
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; CHECK: test3:
+; CHECK: sub.w sp, sp, #805306368
+; CHECK: sub sp, #4 * 4
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 1b1fe7b..4fd4525 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -1,19 +1,23 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=static | \
-; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | \
-; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | \
-; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | \
-; RUN:   grep {GOT} | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
 
 @G = external global i32
 
 define i32 @test1() {
+; STATIC: _test1:
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: add r0, pc
+; PIC: .long L_G$non_lazy_ptr-(LPC0+4)
+
+; LINUX: test1
+; LINUX: .long G(GOT)
 	%tmp = load i32* @G
 	ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/mul_const.ll b/test/CodeGen/Thumb2/mul_const.ll
new file mode 100644
index 0000000..9a2ec93
--- /dev/null
+++ b/test/CodeGen/Thumb2/mul_const.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; rdar://7069502
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add.w r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
new file mode 100644
index 0000000..1f8aea9
--- /dev/null
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+
+	%struct.anon = type { void ()* }
+	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
+@__dso_handle = external global { }		; <{ }*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (void ()*)* @atexit to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define hidden arm_apcscc i32 @atexit(void ()* %func) nounwind {
+entry:
+; CHECK: atexit:
+; CHECK: add r0, pc
+	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
+	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
+	store void ()* %func, void ()** %0, align 4
+	%1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = call arm_apcscc  i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+declare arm_apcscc i32 @atexit_common(%struct.one_atexit_routine*, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
index c1565b3..702df91 100644
--- a/test/CodeGen/Thumb2/thumb2-adc.ll
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -1,32 +1,48 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adc\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 734439407618 = 0x000000ab00000002
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 734439407618
     ret i64 %tmp
 }
 
 ; 5066626890203138 = 0x0012001200000002
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 5066626890203138
     ret i64 %tmp
 }
 
 ; 3747052064576897026 = 0x3400340000000002
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 3747052064576897026
     ret i64 %tmp
 }
 
 ; 6221254862626095106 = 0x5656565600000002
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 6221254862626095106 
     ret i64 %tmp
 }
 
 ; 287104476244869122 = 0x03fc000000000002
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 287104476244869122
     ret i64 %tmp
 }
 
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK: f6:
+; CHECK: adds r0, r0, r2
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index d4f408f..d42ea71 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #255
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #256
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #257
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4094
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4095
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4096
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #255
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #256
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #257
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4094
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4095
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4096
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
 
 define i32 @t2ADDrc_255(i32 %lhs) {
     %Rd = add i32 %lhs, 255;
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
index be89508..e496654 100644
--- a/test/CodeGen/Thumb2/thumb2-add2.ll
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
     %tmp = add i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, #1179666
     %tmp = add i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, #872428544
     %tmp = add i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, #1448498774
     %tmp = add i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, #510
     %tmp = add i32 %a, 510
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
index 1e6341e..8d472cb 100644
--- a/test/CodeGen/Thumb2/thumb2-add3.ll
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = add i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
index b74a33c..b94e84d 100644
--- a/test/CodeGen/Thumb2/thumb2-add4.ll
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -1,31 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 171
     ret i64 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds.w r0, r0, #1179666
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1179666
     ret i64 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds.w r0, r0, #872428544
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 872428544
     ret i64 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds.w r0, r0, #1448498774
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1448498774
     ret i64 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds.w r0, r0, #66846720
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 66846720
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
index 2245214..8b3a4f6 100644
--- a/test/CodeGen/Thumb2/thumb2-add5.ll
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: add r0, r1
     %tmp = add i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
index 9dd3efc..0ecaa79 100644
--- a/test/CodeGen/Thumb2/thumb2-add6.ll
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: adds r0, r0, r2
+; CHECK: adcs r1, r3
     %tmp = add i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
index ab191d5..8e2245a 100644
--- a/test/CodeGen/Thumb2/thumb2-and.ll
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ands r0, r1
     %tmp = and i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: and.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: and.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: and.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: and.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 266d256..1e2666f 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
index 4edf92b..a0a60e6 100644
--- a/test/CodeGen/Thumb2/thumb2-asr.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: asrs r0, r1
     %tmp = ashr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
index 7007948..9c8634f 100644
--- a/test/CodeGen/Thumb2/thumb2-asr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#17} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: asrs r0, r0, #17
     %tmp = ashr i32 %a, 17
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
new file mode 100644
index 0000000..e1f9cdb
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+; CHECK: t1
+; CHECK: beq
+	%tmp2 = icmp eq i32 %a, 0
+	br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+	%tmp5 = add i32 %b, 1
+        %tmp6 = and i32 %tmp5, %c
+	ret i32 %tmp6
+
+cond_false:
+	%tmp7 = add i32 %b, -1
+        %tmp8 = xor i32 %tmp7, %c
+	ret i32 %tmp8
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index 1e5016c..d33cf7e 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
@@ -17,3 +17,9 @@ define i32 @f3(i32 %a) {
     %tmp = and i32 %a, 4095
     ret i32 %tmp
 }
+
+; 2147483646 = 0x7ffffffe   not implementable w/ BFC
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 2147483646
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
index f5a3d20..4e35383 100644
--- a/test/CodeGen/Thumb2/thumb2-bic.ll
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: bic.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 4294967295, %tmp
     %tmp2 = and i32 %a, %tmp1
@@ -36,6 +42,8 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: bic.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %tmp1, %a
@@ -43,6 +51,8 @@ define i32 @f6(i32 %a, i32 %b) {
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: bic.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %a, %tmp1
@@ -50,6 +60,8 @@ define i32 @f7(i32 %a, i32 %b) {
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: bic.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
@@ -57,3 +69,37 @@ define i32 @f8(i32 %a, i32 %b) {
     %tmp2 = and i32 %tmp1, %a
     ret i32 %tmp2
 }
+
+; ~0x000000bb = 4294967108
+define i32 @f9(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+    
+; CHECK: f9:
+; CHECK: bic r0, r0, #187
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f10(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+    
+; CHECK: f10:
+; CHECK: bic r0, r0, #11141290
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f11(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+; CHECK: f11:
+; CHECK: bic r0, r0, #-872363008
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f12(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+; CHECK: f12:
+; CHECK: bic r0, r0, #1114112
+}
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
new file mode 100644
index 0000000..b46cb5f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @f1(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK bne LBB
+        %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f2(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK bge LBB
+        %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f3(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK bhs LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f4(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f4:
+; CHECK blo LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %return, label %cond_true
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-call.ll b/test/CodeGen/Thumb2/thumb2-call.ll
new file mode 100644
index 0000000..7dc6b26
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-call.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: blx r0
+
+; LINUX: h:
+; LINUX: blx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index e5f94a6..0bed058 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index ffe8b98..401c56a 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index 9763dea..c1fcac0 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "cmn "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "cmn\\.w "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 63f20cd..d4773bb 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*#\[0-9\]*$} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmp r0, #187
     %tmp = icmp ne i32 %a, 187
     ret i1 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmp.w r0, #11141290
     %tmp = icmp eq i32 %a, 11141290 
     ret i1 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmp.w r0, #-872363008
     %tmp = icmp ne i32 %a, 3422604288
     ret i1 %tmp
 }
 
 ; 0xdddddddd = 3722304989
 define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmp.w r0, #-572662307
     %tmp = icmp ne i32 %a, 3722304989
     ret i1 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i1 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: cmp.w r0, #1114112
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 368a3b3..55c321d 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: cmp r0, r1
     %tmp = icmp ne i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: cmp r0, r1
     %tmp = icmp eq i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: cmp.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = icmp eq i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: cmp.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = icmp ne i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: cmp.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = icmp eq i32 %a, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: cmp.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
index 56bb46a..b7e2766 100644
--- a/test/CodeGen/Thumb2/thumb2-eor.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: eors r0, r1
     %tmp = xor i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: eor.w r0, r1, r0
     %tmp = xor i32 %b, %a
     ret i32 %tmp
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: eor.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: eor.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: eor.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: eor.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index 11784ca..185634c 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
new file mode 100644
index 0000000..71199ab
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) {
+entry:
+; CHECK: t2:
+; CHECK: ite le
+; CHECK: suble
+; CHECK: subgt
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define void @t3(i32 %a, i32 %b) {
+entry:
+; CHECK: t3:
+; CHECK: it lt
+; CHECK: poplt {r7, pc}
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
new file mode 100644
index 0000000..d917ffe
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: foo:
+; CHECK: it ne
+; CHECK: cmpne
+; CHECK: it hi
+; CHECK: pophi {r7, pc}
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
+
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK: CountTree:
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
+entry:
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: popne {r7, pc}
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+define fastcc void @t2() nounwind {
+entry:
+; CHECK: t2:
+; CHECK: cmp r0, #0
+; CHECK: beq
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
new file mode 100644
index 0000000..1d45d3c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; There shouldn't be a unconditional branch at end of bb52.
+; rdar://7184787
+
+@posed = external global i64                      ; <i64*> [#uses=1]
+
+define i1 @ab_bb52(i64 %.reload78, i64* %.out, i64* %.out1) nounwind {
+newFuncRoot:
+  br label %bb52
+
+bb52.bb55_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 true
+
+bb52.bb53_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 false
+
+bb52:                                             ; preds = %newFuncRoot
+; CHECK: movne
+; CHECK: moveq
+; CHECK: pop
+; CHECK-NEXT: LBB1_2:
+  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
+  %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
+  %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb52.bb55_crit_edge.exitStub, label %bb52.bb53_crit_edge.exitStub
+}
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
new file mode 100644
index 0000000..7d093ec
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep tbb
+
+; Do not use tbb / tbh if any destination is before the jumptable.
+; rdar://7102917
+
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+newFuncRoot:
+	br label %_getopt_internal.exit.ce
+
+codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 0
+
+parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 1
+
+bb1.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 2
+
+bb90.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 3
+
+codeRepl104.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 4
+
+codeRepl113.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 5
+
+codeRepl51.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 6
+
+codeRepl70.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 7
+
+codeRepl119.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 8
+
+codeRepl93.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 9
+
+codeRepl101.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 10
+
+codeRepl120.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 11
+
+codeRepl89.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 12
+
+codeRepl45.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 13
+
+codeRepl58.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 14
+
+codeRepl46.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 15
+
+codeRepl50.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 16
+
+codeRepl52.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 17
+
+codeRepl53.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 18
+
+codeRepl61.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 19
+
+codeRepl85.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 20
+
+codeRepl97.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 21
+
+codeRepl79.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 22
+
+codeRepl102.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 23
+
+codeRepl54.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 24
+
+codeRepl57.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 25
+
+codeRepl103.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 26
+
+_getopt_internal.exit.ce:		; preds = %newFuncRoot
+	switch i32 %0, label %codeRepl127.exitStub [
+		i32 -1, label %parse_options.exit.loopexit.exitStub
+		i32 0, label %bb1.i.exitStub
+		i32 63, label %bb90.i.exitStub
+		i32 66, label %codeRepl104.exitStub
+		i32 67, label %codeRepl113.exitStub
+		i32 71, label %codeRepl51.exitStub
+		i32 77, label %codeRepl70.exitStub
+		i32 78, label %codeRepl119.exitStub
+		i32 80, label %codeRepl93.exitStub
+		i32 81, label %codeRepl101.exitStub
+		i32 82, label %codeRepl120.exitStub
+		i32 88, label %codeRepl89.exitStub
+		i32 97, label %codeRepl45.exitStub
+		i32 98, label %codeRepl58.exitStub
+		i32 99, label %codeRepl46.exitStub
+		i32 100, label %codeRepl50.exitStub
+		i32 104, label %codeRepl52.exitStub
+		i32 108, label %codeRepl53.exitStub
+		i32 109, label %codeRepl61.exitStub
+		i32 110, label %codeRepl85.exitStub
+		i32 111, label %codeRepl97.exitStub
+		i32 113, label %codeRepl79.exitStub
+		i32 114, label %codeRepl102.exitStub
+		i32 115, label %codeRepl54.exitStub
+		i32 116, label %codeRepl57.exitStub
+		i32 118, label %codeRepl103.exitStub
+	]
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 0000000..da2874d
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldmia
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 19c7584..94888fd 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldr r0, [r0]
         %tmp = load i32* %v
         ret i32 %tmp
 }
 
 define i32 @f2(i32* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldr.w r0, [r0, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i32 @f3(i32* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
         %tmp2 = getelementptr i32* %v, i32 1024
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i32 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldr r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i32 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldr r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i32 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldr.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -51,6 +60,10 @@ entry:
 
 define i32 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldr r0, [r0, r1]
+
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index d48ecef..9e6aef4 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
     %tmp.u = load i8* %v.pntr.s0.u1
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 79ffa82..d1af4ba 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index f773e63..9cc3f4a 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\!} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldrsb.*\\!} | count 1
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index 5bacb8e..bf10097 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrb r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrb r0, [r0]
         %tmp = load i8* %v
         ret i8 %tmp
 }
 
 define i8 @f2(i8* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrb r0, [r0, #-1]
         %tmp2 = getelementptr i8* %v, i8 1023
         %tmp = load i8* %tmp2
         ret i8 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i8 @f3(i32 %base) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, 4096
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -27,6 +30,8 @@ entry:
 
 define i8 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrb r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -35,6 +40,8 @@ entry:
 
 define i8 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -43,6 +50,8 @@ entry:
 
 define i8 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrb.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -52,6 +61,9 @@ entry:
 
 define i8 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
new file mode 100644
index 0000000..22d4e88
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;CHECK: ldrd r2, [r2]
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index 15f803e..f1fb79c 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrh r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrh
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrh r0, [r0]
         %tmp = load i16* %v
         ret i16 %tmp
 }
 
 define i16 @f2(i16* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrh.w r0, [r0, #+2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i16 @f3(i16* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
         %tmp2 = getelementptr i16* %v, i16 2048
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i16 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrh r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i16 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i16 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrh.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -51,6 +60,9 @@ entry:
 
 define i16 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
index 666963a..6b0818a 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsls r0, r0, #5
     %tmp = shl i32 %a, 5
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
index eb7a279..f283eef 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsls r0, r1
     %tmp = shl i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
index cf4d2f8..7cbee54 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsrs r0, r0, #13
     %tmp = lshr i32 %a, 13
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
index 01fd56d..87800f9 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsrs r0, r1
     %tmp = lshr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr3.ll b/test/CodeGen/Thumb2/thumb2-lsr3.ll
new file mode 100644
index 0000000..5cfd3f5
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2
+
+define i1 @test1(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: lsrs.w r1, r1, #1
+	%0 = lshr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
+
+define i1 @test2(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: asrs.w r1, r1, #1
+	%0 = ashr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index 0772d7f..be66425 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 6d1640f..782def9 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 0c4c596..e9fdec8 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -1,127 +1,147 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #11206827
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2868947712
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2880154539
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #251658240
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #3948544
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #258
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #4026531840
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; Test #<const>
 
 ; var 2.1 - 0x00ab00ab
 define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_1:
+;CHECK: #11206827
     %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 28027649 ; 0x01abab01
     ret i32 %ret
 }
 
 ; var 2.2 - 0xab00ab00
 define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_1:
+;CHECK: #-1426019584
     %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 279685904 ; 0x10abab10
     ret i32 %ret
 }
 
 ; var 2.3 - 0xabababab
 define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_ok_1:
+;CHECK: #-1414812757
     %ret = add i32 %lhs, 2880154539 ; 0xabababab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2880154554 ; 0xabababba
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2880158379 ; 0xababbaab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 3131812779 ; 0xbaababab
     ret i32 %ret
 }
 
 ; var 3 - 0x0F000000
 define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_1_ok_1:
+;CHECK: #251658240
     %ret = add i32 %lhs, 251658240 ; 0x0F000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_1:
+;CHECK: #3948544
     %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_3_ok_1:
+;CHECK: #258
     %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_4_ok_1:
+;CHECK: #-268435456
     %ret = add i32 %lhs, 4026531840 ; 0xF0000000
     ret i32 %ret
 }
-
diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll
index d2f8c0b..a02f4f0 100644
--- a/test/CodeGen/Thumb2/thumb2-mov2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov2.ll
@@ -1,10 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov  | grep movt
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = and i32 %a, 65535
     %2 = shl i32 1234, 16
     %3 = or  i32 %1, %2
@@ -13,6 +14,11 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -24,6 +30,11 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -36,6 +47,11 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -50,6 +66,11 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
+; CHECK: t2MOVTi16_test_nomatch_1:
+; CHECK:      movw r1, #16384
+; CHECK-NEXT: movt r1, #154
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #154
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -58,7 +79,6 @@ define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
     %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
     %7 = lshr i32  %6,   3
     %8 = or  i32   %5,  %7
-
     ret i32 %8
 }
 
diff --git a/test/CodeGen/Thumb2/thumb2-mov3.ll b/test/CodeGen/Thumb2/thumb2-mov3.ll
index 74418c1..46af6fb 100644
--- a/test/CodeGen/Thumb2/thumb2-mov3.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov3.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mov\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
     %tmp = add i32 0, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
     %tmp = add i32 0, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
     %tmp = add i32 0, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
     %tmp = add i32 0, 1448498774
     ret i32 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
     %tmp = add i32 0, 66846720
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mov4.ll b/test/CodeGen/Thumb2/thumb2-mov4.ll
index 74c522f..06fa238 100644
--- a/test/CodeGen/Thumb2/thumb2-mov4.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
 
 define i32 @f6(i32 %a) {
     %tmp = add i32 0, 65535
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index e976e66..b1515b5 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mul\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: muls r0, r1
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
new file mode 100644
index 0000000..5d47770
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep smmul | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
index 95694d6..a8c8f83 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -1,27 +1,33 @@
-; RUN: llvm-as < %s | llc | grep {mvn\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
-
-target triple = "thumbv7-apple-darwin"
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvn r0, #187
     %tmp = xor i32 4294967295, 187
     ret i32 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvn r0, #11141290
     %tmp = xor i32 4294967295, 11141290 
     ret i32 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mvn r0, #-872363008
     %tmp = xor i32 4294967295, 3422604288
     ret i32 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn r0, #1114112
     %tmp = xor i32 4294967295, 1114112
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
index df9b11b..375d0aa 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvns r0, r0
     %tmp = xor i32 4294967295, %a
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvns r0, r0
     %tmp = xor i32 %a, 4294967295
     ret i32 %tmp
 }
 
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn.w r0, r0, lsl #5
     %tmp = shl i32 %a, 5
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: mvn.w r0, r0, lsr #6
     %tmp = lshr i32 %a, 6
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: mvn.w r0, r0, asr #7
     %tmp = ashr i32 %a, 7
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: mvn.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
index 8f938d5..6bf11ec 100644
--- a/test/CodeGen/Thumb2/thumb2-neg.ll
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#0} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rsbs r0, r0, #0
     %tmp = sub i32 0, %a
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
index 92c4564..d4222c2 100644
--- a/test/CodeGen/Thumb2/thumb2-orn.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
index 7758edd..7b01882 100644
--- a/test/CodeGen/Thumb2/thumb2-orn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} |\
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
index 9891658..89ab7b1 100644
--- a/test/CodeGen/Thumb2/thumb2-orr.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: orrs r0, r1
     %tmp2 = or i32 %a, %b
     ret i32 %tmp2
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: orr.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: orr.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: orr.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: orr.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
index 6f2b62c..759a5b8 100644
--- a/test/CodeGen/Thumb2/thumb2-orr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1145324612\\|#1114112} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#-872363008\\|#1145324612\\|#1114112} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
new file mode 100644
index 0000000..a982249
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhbt | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhtb | count 4
+
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test1a(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
+	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp46
+}
+
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
+	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
+	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
+	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
+	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
+	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
+
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index 4009da3..27b1672 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,8 +1,23 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep {rev\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rev r0, r0
     %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
     ret i32 %tmp
 }
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+define i32 @f2(i32 %X) {
+; CHECK: f2:
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll
new file mode 100644
index 0000000..39b6ac3
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -0,0 +1,32 @@
+; XFAIL: *
+; fixme rev16 pattern is not matching
+
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+; 0xff00ff00 = 4278255360
+; 0x00ff00ff = 16711935
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %mask_l8 = and i32 %l8, 4278255360
+    %mask_r8 = and i32 %r8, 16711935
+    %tmp = or i32 %mask_l8, %mask_r8
+    ret i32 %tmp
+}
+
+; 0xff000000 = 4278190080
+; 0x00ff0000 = 16711680
+; 0x0000ff00 = 65280
+; 0x000000ff = 255
+define i32 @f2(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %masklo_l8 = and i32 %l8, 65280
+    %maskhi_l8 = and i32 %l8, 4278190080
+    %masklo_r8 = and i32 %r8, 255
+    %maskhi_r8 = and i32 %r8, 16711680
+    %tmp1 = or i32 %masklo_l8, %masklo_r8
+    %tmp2 = or i32 %maskhi_l8, %maskhi_r8
+    %tmp = or i32 %tmp1, %tmp2
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 305ab99..01adb52 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {ror\\.w\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
 
 define i32 @f1(i32 %a) {
     %l8 = shl i32 %a, 10
diff --git a/test/CodeGen/Thumb2/thumb2-ror2.ll b/test/CodeGen/Thumb2/thumb2-ror2.ll
index dd19b0a..ffd1dd7 100644
--- a/test/CodeGen/Thumb2/thumb2-ror2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: rors r0, r1
     %db = sub i32 32, %b
     %l8 = shl i32 %a, %b
     %r8 = lshr i32 %a, %db
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
index 5779687..4611e94 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f2(i32 %a, i32 %b) {
     %tmp = shl i32 %b, 5
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
index 957d1d0..84a3796 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb2.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
new file mode 100644
index 0000000..ad96291
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
new file mode 100644
index 0000000..2dcf8aa
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+; CHECK: f1:
+; CHECK: it eq
+; CHECK: moveq
+
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+; CHECK: f2:
+; CHECK: it gt
+; CHECK: movgt
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f3:
+; CHECK: it lt
+; CHECK: movlt
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f4:
+; CHECK: it le
+; CHECK: movle
+
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f5:
+; CHECK: it ls
+; CHECK: movls
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f6:
+; CHECK: it hi
+; CHECK: movhi
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f7:
+; CHECK: it hi
+; CHECK: lsrhi.w
+    %tmp1 = icmp ugt i32 %a, %b
+    %tmp2 = udiv i32 %c, 3
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f8:
+; CHECK: it lo
+; CHECK: lsllo.w
+    %tmp1 = icmp ult i32 %a, %b
+    %tmp2 = mul i32 %c, 4
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f9:
+; CHECK: it ge
+; CHECK: rorge.w
+    %tmp1 = icmp sge i32 %a, %b
+    %tmp2 = shl i32 %c, 10
+    %tmp3 = lshr i32 %c, 22
+    %tmp4 = or i32 %tmp2, %tmp3
+    %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
+    ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
new file mode 100644
index 0000000..b4274ad
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mvn | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep it  | count 3
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 10
+        %tmp3 = sub i32 %b, %tmp2
+        ret i32 %tmp3
+}
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 9bd6e43..7746cd3 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep asr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mov
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsl
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep asr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ror
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep mov
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
         %A = shl i32 %Y, 16
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
new file mode 100644
index 0000000..66cc884
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smlabt | count 1
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
new file mode 100644
index 0000000..cdbf4ca
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smulbt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smultt | count 1
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
new file mode 100644
index 0000000..0a7221c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 4097a6c..3eeec8c 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32* %v) {
+; CHECK: f1:
+; CHECK: str r0, [r1]
         store i32 %a, i32* %v
         ret i32 %a
 }
 
 define i32 @f2(i32 %a, i32* %v) {
+; CHECK: f2:
+; CHECK: str.w r0, [r1, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f2a(i32 %a, i32* %v) {
+; CHECK: f2a:
+; CHECK: str r0, [r1, #-128]
         %tmp2 = getelementptr i32* %v, i32 -32
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f3(i32 %a, i32* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: str r0, [r1, r2]
         %tmp2 = getelementptr i32* %v, i32 1024
         store i32 %a, i32* %tmp2
         ret i32 %a
@@ -30,6 +34,8 @@ define i32 @f3(i32 %a, i32* %v) {
 
 define i32 @f4(i32 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: str r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i32 @f5(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: str r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i32 @f6(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: str.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -55,6 +65,9 @@ entry:
 
 define i32 @f7(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: str r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index 536011c..bee5810 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 1e93b70..6c804ee 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index d8401cd6..1ebb938 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8 %a, i8* %v) {
+; CHECK: f1:
+; CHECK: strb r0, [r1]
         store i8 %a, i8* %v
         ret i8 %a
 }
 
 define i8 @f2(i8 %a, i8* %v) {
+; CHECK: f2:
+; CHECK: strb.w r0, [r1, #+4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f2a(i8 %a, i8* %v) {
+; CHECK: f2a:
+; CHECK: strb r0, [r1, #-128]
         %tmp2 = getelementptr i8* %v, i32 -128
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f3(i8 %a, i8* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strb r0, [r1, r2]
         %tmp2 = getelementptr i8* %v, i32 4096
         store i8 %a, i8* %tmp2
         ret i8 %a
@@ -30,6 +34,8 @@ define i8 @f3(i8 %a, i8* %v) {
 
 define i8 @f4(i8 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strb r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i8 @f5(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strb r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i8 @f6(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strb.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -55,6 +65,9 @@ entry:
 
 define i8 @f7(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strb r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index 80dedf0..b0eb8c1 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16 %a, i16* %v) {
+; CHECK: f1:
+; CHECK: strh r0, [r1]
         store i16 %a, i16* %v
         ret i16 %a
 }
 
 define i16 @f2(i16 %a, i16* %v) {
+; CHECK: f2:
+; CHECK: strh.w r0, [r1, #+4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f2a(i16 %a, i16* %v) {
+; CHECK: f2a:
+; CHECK: strh r0, [r1, #-128]
         %tmp2 = getelementptr i16* %v, i32 -64
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f3(i16 %a, i16* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strh r0, [r1, r2]
         %tmp2 = getelementptr i16* %v, i32 2048
         store i16 %a, i16* %tmp2
         ret i16 %a
@@ -30,6 +34,8 @@ define i16 @f3(i16 %a, i16* %v) {
 
 define i16 @f4(i16 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strh r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i16 @f5(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strh r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i16 @f6(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strh.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -55,6 +65,9 @@ entry:
 
 define i16 @f7(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strh r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-sub.ll b/test/CodeGen/Thumb2/thumb2-sub.ll
index cf82704..95335a2 100644
--- a/test/CodeGen/Thumb2/thumb2-sub.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -1,31 +1,49 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\[w\]\\?\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: subs r0, #171
     %tmp = sub i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, #1179666
     %tmp = sub i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, #872428544
     %tmp = sub i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, #1448498774
     %tmp = sub i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, #510
     %tmp = sub i32 %a, 510
     ret i32 %tmp
 }
+
+; Don't change this to an add.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: subs r0, #1
+    %tmp = sub i32 %a, 1
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll
index c7ebd22..6813f76 100644
--- a/test/CodeGen/Thumb2/thumb2-sub2.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = sub i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-sub4.ll b/test/CodeGen/Thumb2/thumb2-sub4.ll
index fd283fd..a040d17 100644
--- a/test/CodeGen/Thumb2/thumb2-sub4.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r1
     %tmp = sub i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index 3e9ec25..c3b56bc 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subs\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
     %tmp = sub i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 4afe354..33ed543 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
new file mode 100644
index 0000000..5dc3cc3
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+define void @bar(i32 %n.u) {
+entry:
+; CHECK: bar:
+; CHECK: tbb
+; CHECK: .align 1
+
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
new file mode 100644
index 0000000..c5cb6f3
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+; Thumb2 target should reorder the bb's in order to use tbb / tbh.
+
+; XFAIL: *
+
+	%struct.R_flstr = type { i32, i32, i8* }
+	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
+@_C_nextcmd = external global i32		; <i32*> [#uses=3]
+@.str31 = external constant [28 x i8], align 1		; <[28 x i8]*> [#uses=1]
+@_T_gtol = external global %struct._T_tstr*		; <%struct._T_tstr**> [#uses=2]
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+declare arm_apcscc void @Z_fatal(i8*) noreturn nounwind
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+; CHECK: main:
+; CHECK: tbh
+entry:
+	br label %bb42.i
+
+bb1.i2:		; preds = %bb42.i
+	br label %bb40.i
+
+bb5.i:		; preds = %bb42.i
+	%0 = or i32 %_Y_flags.1, 32		; <i32> [#uses=1]
+	br label %bb40.i
+
+bb7.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind
+	unreachable
+
+bb15.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind
+	unreachable
+
+bb23.i:		; preds = %bb42.i
+	%1 = call arm_apcscc  i32 @strlen(i8* null) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb33.i:		; preds = %bb42.i
+	store i32 0, i32* @_C_nextcmd, align 4
+	%2 = call arm_apcscc  noalias i8* @calloc(i32 21, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb34.i:		; preds = %bb42.i
+	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%4 = add i32 %3, 1		; <i32> [#uses=1]
+	store i32 %4, i32* @_C_nextcmd, align 4
+	%5 = call arm_apcscc  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb35.i:		; preds = %bb42.i
+	%6 = call arm_apcscc  noalias i8* @calloc(i32 20, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb37.i:		; preds = %bb42.i
+	%7 = call arm_apcscc  noalias i8* @calloc(i32 14, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb39.i:		; preds = %bb42.i
+	call arm_apcscc  void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+	unreachable
+
+bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
+	%_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ]		; <i32> [#uses=1]
+	%_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ]		; <i1> [#uses=1]
+	br label %bb42.i
+
+bb42.i:		; preds = %bb40.i, %entry
+	%_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ]		; <i1> [#uses=2]
+	%_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ]		; <i32> [#uses=2]
+	switch i32 undef, label %bb39.i [
+		i32 67, label %bb33.i
+		i32 70, label %bb35.i
+		i32 77, label %bb37.i
+		i32 83, label %bb34.i
+		i32 97, label %bb7.i
+		i32 100, label %bb5.i
+		i32 101, label %bb40.i
+		i32 102, label %bb23.i
+		i32 105, label %bb15.i
+		i32 116, label %bb1.i2
+	]
+}
+
+declare arm_apcscc void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index c3c2094..634d318 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index fe2b2c8..c6867d9 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 9e2d3e5..525a817 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {tst\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index c0f404c..db202dd 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: tst.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -36,6 +42,8 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: tst.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -43,6 +51,8 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: tst.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -50,6 +60,8 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: tst.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 0d1cc18..37919dd 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxth | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtab | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 28a5fe4..4022d95 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 6abb6eb..1e55557 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll
index 3396b0b..b8a0657 100644
--- a/test/CodeGen/Thumb2/tls2.ll
+++ b/test/CodeGen/Thumb2/tls2.ll
@@ -1,19 +1,29 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
 define i32 @f() {
 entry:
+; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: f:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32* @g() {
 entry:
+; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: g:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	ret i32* @i
 }
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2b4242a..2484860 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -3,7 +3,7 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep spilled
+; RUN: llc < %s -march=x86 -stats |& not grep spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
index a4d5589..5c40eea 100644
--- a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
+++ b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
index 4de3c79..8b0a185 100644
--- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   not grep {.byte\[\[:space:\]\]*true}
 
 @X = global i1 true             ; <i1*> [#uses=0]
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
index 56bb21c..f15a1b4 100644
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
 
 @A = global [32 x i32] zeroinitializer
 @B = global [32 x i32] zeroinitializer
diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
index f48b1d3..fea2b54 100644
--- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
index b25dfaf..f986ebd 100644
--- a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
+++ b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -i ESP | not grep sub
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub
 
 define i32 @test(i32 %X) {
         ret i32 %X
diff --git a/test/CodeGen/X86/2004-02-22-Casts.ll b/test/CodeGen/X86/2004-02-22-Casts.ll
index 40d5f39..dabf7d3 100644
--- a/test/CodeGen/X86/2004-02-22-Casts.ll
+++ b/test/CodeGen/X86/2004-02-22-Casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 define i1 @test1(double %X) {
         %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]
         ret i1 %V
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index 5021fd8..b6631b6 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {j\[lgbe\]}
+; RUN: llc < %s -march=x86 | not grep {j\[lgbe\]}
 
 define i32 @max(i32 %A, i32 %B) {
         %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
index 633a615..c62fee1 100644
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
@@ -2,7 +2,7 @@
 ; overlapping live intervals. When two overlapping intervals have the same
 ; value, they can be joined though.
 ;
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=linearscan | \
+; RUN: llc < %s -march=x86 -regalloc=linearscan | \
 ; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
 
 define i64 @test(i64 %x) {
diff --git a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
index 858605c..f8ed016 100644
--- a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
+++ b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define double @test(double %d) {
         %X = select i1 false, double %d, double %d              ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
index 1a51bee..036aa6a 100644
--- a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
+++ b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @T(double %X) {
         %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
index 9ee773c..db3af01 100644
--- a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
+++ b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {
         %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index 37cff57..32fafc6 100644
--- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -3,7 +3,7 @@
 ; is invalid code (there is no correct way to order the instruction).  Check
 ; that we do not fold the load into the sub.
 
-; RUN: llvm-as < %s | llc -march=x86 | not grep sub.*GLOBAL
+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL
 
 @GLOBAL = external global i32           ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
index 762047b..30a6ac6 100644
--- a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
+++ b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 18446744073709551612
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612
 
 @A = external global i32                ; <i32*> [#uses=1]
 @Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
index 04035ac..5266009 100644
--- a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=generic
+; RUN: llc < %s -march=x86 -mcpu=generic
 ; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.
 
 define void @radfg_() {
diff --git a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 817b281..d906da4 100644
--- a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep shld | count 1
 ;
 ; Check that the isel does not fold the shld, which already folds a load
diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
index 51d2fb2..dc69ef8 100644
--- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
 
 define i32 @f(i32 %a, i32 %b) {
         %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index c410c46..0421896 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  -stats |& \
+; RUN: llc < %s -march=x86  -stats |& \
 ; RUN:   grep asm-printer | grep 7
 
 define i32 @g(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index 743790c..c106f57 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 4a0b5c3..8783a11 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
 ; RUN: grep {movl	_last} %t | count 1
 ; RUN: grep {cmpl.*_last} %t | count 1
 
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index f283666..49f3a95 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& \
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
 ; RUN:   not grep {Number of register spills}
 ; END.
 
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 72dab39..7d0a6ab 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static  -stats |& \
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
 ; RUN:   grep asm-printer | grep 14
 ;
 @size20 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 48ed2b9..23954d7 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats  |& \
+; RUN: llc < %s -march=x86 -stats  |& \
 ; RUN:   grep asm-printer | grep 13
 
 define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
diff --git a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
index 900abe5..8421483 100644
--- a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
+++ b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -1,7 +1,7 @@
 ; Coalescing from R32 to a subset R32_. Once another register coalescer bug is
 ; fixed, the movb should go away as well.
 
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep movl
 
 @B = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index c39b377..d58d638 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
 
 @A = external global i16*		; <i16**> [#uses=1]
 @B = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 6c0e76b..89b127c 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
 ; RUN:     grep {asm-printer} | grep 31
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-05-17-VectorArg.ll b/test/CodeGen/X86/2006-05-17-VectorArg.ll
index 217cbe1..b36d61e 100644
--- a/test/CodeGen/X86/2006-05-17-VectorArg.ll
+++ b/test/CodeGen/X86/2006-05-17-VectorArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @opRSQ(<4 x float> %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index ae18c90..083d068 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep setnp
-; RUN: llvm-as < %s | llc -march=x86 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 | grep setnp
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index 78838d1..0288278 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 	br i1 false, label %cond_next33, label %cond_true12
diff --git a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
index 760fe36..4ea364d 100644
--- a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
+++ b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR825
 
 define i64 @test() {
diff --git a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
index 1db3921..568fbbc 100644
--- a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
+++ b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR828
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-07-19-ATTAsm.ll b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
index 78167f6..c8fd10f 100644
--- a/test/CodeGen/X86/2006-07-19-ATTAsm.ll
+++ b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att
 ; PR834
 ; END.
 
diff --git a/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
index 08510a8..cac47cda 100644
--- a/test/CodeGen/X86/2006-07-20-InlineAsm.ll
+++ b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR833
 
 @G = weak global i32 0		; <i32*> [#uses=3]
diff --git a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
index a82612b..deae086 100644
--- a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
+++ b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -- 4294967240
+; RUN: llc < %s -march=x86 | grep -- 4294967240
 ; PR853
 
 @X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index 2a521ad..3159cec 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,5 +1,5 @@
 ; PR850
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att > %t
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
 ; RUN: grep {movl 4(%eax),%ebp} %t
 ; RUN: grep {movl 0(%eax), %ebx} %t
 
diff --git a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
index 194cd66..aea707e 100644
--- a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 	%struct.foo = type opaque
 
 define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {
diff --git a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
index f2a8855..5fee326 100644
--- a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }
 	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }
 	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }
diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
index c1d81d5..a19d8f7 100644
--- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:    not grep {movl %eax, %edx}
 
 define i32 @foo(i32 %t, i32 %C) {
diff --git a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
index dd21c04..1e890bb 100644
--- a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
index cc988f2..795d464 100644
--- a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR933
 
 define fastcc i1 @test() {
diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
index e8055f5..bf9fa57 100644
--- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
+++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
 ; Test that the load is NOT folded into the intrinsic, which would zero the top
 ; elts of the loaded vector.
 
diff --git a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
index d627d1b..fbb14ee 100644
--- a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @_ZN13QFSFileEngine4readEPcx() {
 	%tmp201 = load i32* null		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
index 5dc1cb3..b1f0451 100644
--- a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
+++ b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep shrl
+; RUN: llc < %s -march=x86 | grep shrl
 ; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31
 ; is then optimized away.
 @tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
index 31eb070..3b987ac 100644
--- a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.function = type opaque
 	%struct.lang_decl = type opaque
 	%struct.location_t = type { i8*, i32 }
diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 2b53f26..6ed2e7b 100644
--- a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
 
 define void @test() {
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index 1ff687a..88e8b4a 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -1,11 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | %prcontext je 1 | \
-; RUN:   grep BB1_1:
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
 
 @str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]
 @str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @test(i32 %argc, i8** %argv) nounwind {
 entry:
+; CHECK: cmpl	$2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
+
 	switch i32 %argc, label %UnifiedReturnBlock [
 		 i32 1, label %bb
 		 i32 2, label %bb2
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 1a92852..91210ea 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {subl	\$4, %esp}
+; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
 
 target triple = "i686-pc-linux-gnu"
 @str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index f0067c7..e839d72 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep movb %t | count 2
 ; RUN: grep {movzb\[wl\]} %t
 
 
-define void @handle_vector_size_attribute() {
+define void @handle_vector_size_attribute() nounwind {
 entry:
 	%tmp69 = load i32* null		; <i32> [#uses=1]
 	switch i32 %tmp69, label %bb84 [
diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
index 1222a37..ea2e6db 100644
--- a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep test.*1
+; RUN: llc < %s -march=x86 | grep test.*1
 ; PR1016
 
 define i32 @test(i32 %A, i32 %B, i32 %C) {
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
index a58bedc..8c1573f 100644
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ b/test/CodeGen/X86/2006-11-28-Memcpy.ll
@@ -1,8 +1,6 @@
 ; PR1022, PR1023
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep 3721182122 | count 2
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep -E {movl	_?bytes2} | count 1
+; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
+; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
 
 @fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
 @bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
index 17234b8..f81b303 100644
--- a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel
 ; PR1061
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 3b365f3..e1bae32 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,8 +1,7 @@
 ; PR1075
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | \
-; RUN:   %prcontext {mulss	LCPI1_3} 1 | grep mulss | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-define float @foo(float %x) {
+define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
     %tmp3 = fmul float %x, 5.000000e+00
     %tmp5 = fmul float %x, 7.000000e+00
@@ -11,4 +10,10 @@ define float @foo(float %x) {
     %tmp12 = fadd float %tmp10, %tmp5
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
+
+; CHECK:      mulss	LCPI1_2(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: mulss	LCPI1_3(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: ret
 }
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index c03d982..5e7c0a7 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep leaq %t
 ; RUN: not grep {,%rsp)} %t
 ; PR1103
diff --git a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
index b1c86f4..e83e2e5 100644
--- a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
+++ b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; Test 'ri' constraint.
 
 define void @run_init_process() {
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index 26d3e36..93e8808 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1, %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {leal	3(,%eax,8)}
+; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
 
 ;; This example can't fold the or into an LEA.
 define i32 @test(float ** %tmp2, i32 %tmp12) {
diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 365768a..954c95d 100644
--- a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
 ; PR1027
 
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
diff --git a/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
index 3b1eb1f..2e2b56d 100644
--- a/test/CodeGen/X86/2007-02-25-FastCCStack.ll
+++ b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium3
+; RUN: llc < %s -march=x86 -mcpu=pentium3
 
 define internal fastcc double @ggc_rlimit_bound(double %limit) {
     ret double %limit
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index 721b6e7..112d1ab 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
 
 define void @test() nounwind {
 test.exit:
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4c69ec7..4cac9b4 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-darwin | \
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
 ; RUN:   grep push | count 3
 
 define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index c98c89a..9580726 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 ; ModuleID = 'a.bc'
 
diff --git a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
index 6965849..70936fb 100644
--- a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1259
 
 define void @test() {
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
index babcf6a..44d68dd 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test(i16 %tmp40414244) {
   %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
index 9bdb249..3312e01 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov %gs:72, %eax}
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index 6e1adf8..c1b1ad1 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -march=x86 | \
+; RUN: llc < %s -mcpu=yonah -march=x86 | \
 ; RUN:   grep {cmpltsd %xmm0, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
index e440cdb..30453d5 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {psrlw \$8, %xmm0}
+; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
index 7ce0584..9676f14 100644
--- a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
+++ b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 @data = external global [339 x i64]
 
diff --git a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
index 840fc7d..9f09e88 100644
--- a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1314
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }
 @_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]
 
-define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) {
+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
 entry:
 	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]
 	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
index 514d665..f48c132 100644
--- a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
index f9671a4..4604f46 100644
--- a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
index 74e6e72..7528129 100644
--- a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep 4294967112
+; RUN: llc < %s -march=x86-64 | not grep 4294967112
 ; PR1348
 
 	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
index 3e08e50..e38992d 100644
--- a/test/CodeGen/X86/2007-04-24-VectorCrash.ll
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index ac85a9d7..113d0eb 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep paddq | count 2
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep movq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep paddq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep movq | count 2
 
 define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) {
 entry:
diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
index cbd6a73..85a2ecc 100644
--- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {bsrl.*10}
+; RUN: llc < %s | not grep {bsrl.*10}
 ; PR1356
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index b0bcf5c..e58b193 100644
--- a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse
 ; PR1371
 
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index ff7aac0..a3ff2f6 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
 ; PR1398
 
 	%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index 61f8b2c..8ef2538 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
 	%struct.OpaqueXDataStorageType = type opaque
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index d9836e4..2093b8f 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 64ccef3..989dfc5 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpckhwd
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
 declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
diff --git a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
index 5d09075..321e116 100644
--- a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
+++ b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep GOTPCREL
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep ".align.*3"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"
 
 	%struct.A = type { [1024 x i8] }
 @_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
index 0ad5396..baf2377 100644
--- a/test/CodeGen/X86/2007-06-04-tailmerge4.ll
+++ b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -asm-verbose | grep invcont131
+; RUN: llc < %s -enable-eh -asm-verbose | grep invcont131
 ; PR 1496:  tail merge was incorrectly removing this block
 
 ; ModuleID = 'report.1.bc'
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
index 3e7776a..36a97ef 100644
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
@@ -1,5 +1,5 @@
 ; PR1495
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
index 7756d06..2680b15 100644
--- a/test/CodeGen/X86/2007-06-14-branchfold.ll
+++ b/test/CodeGen/X86/2007-06-14-branchfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i686 | not grep jmp
+; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
 ; check that branch folding understands FP_REG_KILL is not a branch
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index e608ac3..6128d8b 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep paddusw
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
 @R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
diff --git a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
index af11f12..9d42c49 100644
--- a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
+++ b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test() {
 	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
diff --git a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
index bcd265a..d2d6388 100644
--- a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index 66a58c7..dc11eec 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test(<4 x float>* %arg) {
 	%tmp89 = getelementptr <4 x float>* %arg, i64 3
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 18850b1..2c513f1 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
 
 @R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index 7f09b52..d611677 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
 ; PR1545
 
 @.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index c0bd282..8625b27 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 8eda0ab..3cd8052 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movl
+; RUN: llc < %s -march=x86 | not grep movl
 
 define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
 	%tmp2 = add i8 %x, 2
diff --git a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index e9ea843..7768f36 100644
--- a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
 	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index b62d2c6..e93092f 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movsbl}
+; RUN: llc < %s -march=x86 | grep {movsbl}
 
 @X = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
index f6ed0fe..c90a85f 100644
--- a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
+++ b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep drectve
+; RUN: llc < %s -march=x86 | not grep drectve
 ; PR1607
 
 %hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
index edcb823..d6ea510 100644
--- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
+++ b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index b6a5fc9..5acb051 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 4f95b76..c5d2a46 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep weak | count 2
+; RUN: llc < %s -march=x86 | grep weak | count 2
 @__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
 declare extern_weak i32 @pthread_once(i32*, void ()*)
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index 6a313be..56ee2a3 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
 
 	%struct.NSString = type {  }
 	%struct._objc__method_prototype_list = type opaque
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 835e4ca..0ae1897 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep 170
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
 
 define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index be51c04..4a56ee4 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep powixf2
-; RUN: llvm-as < %s | llc | grep fsqrt
+; RUN: llc < %s | grep powixf2
+; RUN: llc < %s | grep fsqrt
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
index a733bb3..6fc8ec9 100644
--- a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
+++ b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
 	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
index e9fbe79..67323e8 100644
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index e2fdbb3..fc11347 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movb
+; RUN: llc < %s -march=x86 | not grep movb
 
 define i16 @f(i32* %bp, i32* %ss) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index fd914a1..ea1bbc4 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep addss | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
 
 define fastcc void @fht(float* %fz, i16 signext  %n) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 3016a01..a3872ad 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sarl | not grep esp
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
 
 define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
index 6cac558..8a55935 100644
--- a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
         %struct._Unwind_Context = type {  }
 
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index 4ea4244..1e4ae84 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
 ; PR1729
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
index a414ef0..fbcac50 100644
--- a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 define i64 @__ashldi3(i64 %u, i64 %b) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
index 5332fa1..6d0cb47 100644
--- a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep movb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep movb | not grep x
 ; PR1734
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
index 2fcf76b..3f9845c 100644
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ b/test/CodeGen/X86/2007-10-16-fp80_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
index f3cdfee..c0bb55e 100644
--- a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep addb | not grep x
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
 ; PR1734
 
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index e649999..600bd1f 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
 
 define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
index 450911a..984094d 100644
--- a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
+++ b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1748
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 9013e90..86d3bbf 100644
--- a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
index 1c912a0..42db98b 100644
--- a/test/CodeGen/X86/2007-10-30-LSRCrash.ll
+++ b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
index f73a910..1b8e67d 100644
--- a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
+++ b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2
+; RUN: llc < %s -march=x86 -mattr=sse2
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
index 704efd0..019c6a8 100644
--- a/test/CodeGen/X86/2007-11-01-ISelCrash.ll
+++ b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
         %"struct.K::JL" = type <{ i8 }>
         %struct.jv = type { i64 }
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
index 4ae4d2f..4e11cda 100644
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
 
 	%struct.color_sample = type { i64 }
 	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
diff --git a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
index ffa6e44..27ec826 100644
--- a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
+++ b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1763
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 889b122..4045618 100644
--- a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1766
 
         %struct.dentry = type { %struct.dentry_operations* }
diff --git a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
index 7e41f36..6b871aa 100644
--- a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1767
 
 define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index de33c61..8e586a7 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep {foo _str$}
+; RUN: llc < %s -relocation-model=static | grep {foo _str$}
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index a4e44e1..f6db0d0 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-11-07-MulBy4.ll b/test/CodeGen/X86/2007-11-07-MulBy4.ll
index d7fb684..d5b630b 100644
--- a/test/CodeGen/X86/2007-11-07-MulBy4.ll
+++ b/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 	%struct.eebb = type { %struct.eebb*, i16* }
 	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
index b5635b3..9c004f9 100644
--- a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
+++ b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
 
 	%struct.double_int = type { i64, i64 }
 	%struct.tree_common = type <{ i8, [3 x i8] }>
@@ -6,7 +7,7 @@
 	%struct.tree_node = type { %struct.tree_int_cst }
 @tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1]
 
-define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) {
+define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
 entry:
 	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]
 	br i1 false, label %UnifiedReturnBlock, label %bb21
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 46422bc..0626d28 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
 ; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from
 ; kicking in.
diff --git a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
index 0d43a6e..debb461 100644
--- a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
+++ b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 4
+; RUN: llc < %s -march=x86 | grep cmp | count 4
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
index cb7a3dc..ca995cc 100644
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
+; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
 
 	%struct.__sbuf = type { i8*, i32 }
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
index 8ad7705..455de91 100644
--- a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
+++ b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
 ; PR1799
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 6309f3c..265d968 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
index fddfd4f..7aec613 100644
--- a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
+++ b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 8a1520c..b040095 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
index 962d6ec..6997d53 100644
--- a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o - | grep sinl
+; RUN: llc < %s -o - | grep sinl
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 38020c1..d795610 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -regalloc=local
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
diff --git a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
index 4feb078..e91f52e 100644
--- a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
+++ b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
 
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
index 4510edb..704b2ba 100644
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ b/test/CodeGen/X86/2008-01-16-Trampoline.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
 
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
index ffb82ae..b936686 100644
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep nop
+; RUN: llc < %s -march=x86 | grep nop
 target triple = "i686-apple-darwin8"
 
 
diff --git a/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
index 6885cf1..443a32d 100644
--- a/test/CodeGen/X86/2008-02-05-ISelCrash.ll
+++ b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1975
 
 @nodes = external global i64		; <i64*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index 6db6537..d2d5149 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xor | grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
 
 define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
index 230af57..b772d77 100644
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
 
 declare double @llvm.sqrt.f64(double) nounwind readnone 
 
diff --git a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
index 5bf8456..1983f1d 100644
--- a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
+++ b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and
+; RUN: llc < %s -march=x86 | grep and
 define i32 @test(i1 %A) {
 	%B = zext i1 %A to i32		; <i32> [#uses=1]
 	%C = sub i32 0, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 47c8677..9b52c5c 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 557d00c..5115e48 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {a:} | not grep ax
-; RUN: llvm-as < %s | llc | grep {b:} | not grep ax
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index 8cf36425..6b1eefe 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi
 ; PR2082
 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
 ; registers.
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index f78d526..8d6bb0d 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
index ff7cf5e..1d31859 100644
--- a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
 ; PR2076
 
 define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index 5d60bde..6615b8c 100644
--- a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XX = type <{ i8 }>
 	%struct.YY = type { i64 }
diff --git a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
index 3ba31f4..0b4eb3a 100644
--- a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
+++ b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index fe0ee8a..ad7950c 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
 	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
index 055eabb..d842967 100644
--- a/test/CodeGen/X86/2008-02-27-PEICrash.ll
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
index 2d7182e..70a83b5 100644
--- a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
+++ b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=i386
 ; PR2122
 define float @func(float %a, float %b) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-07-APIntBug.ll b/test/CodeGen/X86/2008-03-07-APIntBug.ll
index 5d1ccad..84e4827 100644
--- a/test/CodeGen/X86/2008-03-07-APIntBug.ll
+++ b/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | not grep 255
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
 
 	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
 	%struct.FIRST_UNION = type { %struct.anon }
diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
index 1098988..cd2d609 100644
--- a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
+++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
 ; PR2134
 
 declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index 0f83b39..e673d31 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep TLSGD | count 2
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
 ; PR2137
 
 ; ModuleID = '1.c'
diff --git a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
index 4a896e9..c6ba22e 100644
--- a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
+++ b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i16 @t(i32 %depth) signext nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 544c9b5..8946415 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR2138
 
 	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index 4b6758d..ccc4d75 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
 
 	%struct..0objc_object = type opaque
 	%struct.OhBoy = type {  }
diff --git a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
index 2fad32a..eaa883c 100644
--- a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 6cf731b..4dc3a10 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
index 53bb054..2d868e0 100644
--- a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @t() {
 entry:
diff --git a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
index 83e1d60..305968a 100644
--- a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
+++ b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
 ; Don't fold re-materialized load into a two address instruction
 
 	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
diff --git a/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
index fff75ff..a9f368b 100644
--- a/test/CodeGen/X86/2008-04-02-unnamedEH.ll
+++ b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc | grep unnamed_1_0.eh
-; ModuleID = '<stdin>'
+; RUN: llc < %s | grep unnamed_1.eh
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index f5de113..dc8c097 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 define i32 @t2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index fea54c4..41fbdd1 100644
--- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep jmp
+; RUN: llc < %s -march=x86 | not grep jmp
 
 	%struct..0anon = type { i32 }
 	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 4bb8c6d..83eb61a 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
index 30accad..3ccc0fe 100644
--- a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 3e0662a..6e8891b 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
 
 	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index c69ff33..ac48285 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
 ; Make sure xorl operands are 32-bit registers.
 
 	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
index 09fdc70..6389267 100644
--- a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
+++ b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 120
+; RUN: llc < %s -march=x86 | not grep 120
 ; Don't accidentally add the offset twice for trailing bytes.
 
 	%struct.S63 = type { [63 x i8] }
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 838c2ea..4eaca17 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mattr=+sse41
+; RUN: llc < %s -mattr=+sse41
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
index 82721a5..38d6aa6 100644
--- a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {1 \$2 3}
+; RUN: llc < %s | grep {1 \$2 3}
 ; rdar://5720231
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index f93ad9a..5b97eb7 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
 ; RUN: not grep {r\[abcd\]x} %t
 ; RUN: not grep {r\[ds\]i} %t
 ; RUN: not grep {r\[bs\]p} %t
diff --git a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
index 6613faf..6e8e98d 100644
--- a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
+++ b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i64 @t(i64 %maxIdleDuration) nounwind  {
 	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
diff --git a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
index d7b5f25..a708224 100644
--- a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
+++ b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | grep jnp
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
 ; rdar://5902801
 
 declare void @test2()
diff --git a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
index c0b1961..cea0076 100644
--- a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
+++ b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
index 9bcd1f3..5ceb546 100644
--- a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
+++ b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define fastcc void @glgVectorFloatConversion() nounwind  {
 	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 8751328..1f95a24 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep abort | count 1
+; RUN: llc < %s | grep abort | count 1
 ; Calls to abort should all be merged
 
 ; ModuleID = '5898899.c'
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index 9ecd581..9cf50f4 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 -fast-isel=false | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index c9e30d8..19a7354 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movups | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
index 68f6cce..32bf8d4 100644
--- a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR2289
 
 define void @_ada_ca11001() {
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 02db2ed..f1a19ec 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
index d282761..236b7cd 100644
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
 
 	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
 	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 0cde7cf..90af387 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86 | grep movw
-; RUN: llvm-as < %s | llc -march=x86 | grep addw
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
 ; These transforms are turned off for volatile loads and stores.
 ; Check that they weren't turned off for all loads and stores!
 
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 2b64212..500cd1f 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index 75513b6..4d4819a 100644
--- a/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 4
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
 
 define i16 @test(i16* %tmp179) nounwind  {
 	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
diff --git a/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
index ba0a1f9..66f9065 100644
--- a/test/CodeGen/X86/2008-06-18-BadShuffle.ll
+++ b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
 
 ; Test to make sure we actually insert the bottom element of the vector
 define <8 x i16> @a(<8 x i16> %a) nounwind  {
diff --git a/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
index f369986..72d1907 100644
--- a/test/CodeGen/X86/2008-06-25-VecISelBug.ll
+++ b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
 
 define void @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
index 3586f877..46341fc 100644
--- a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
+++ b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
 	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
diff --git a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
index 5fb3e57..1a786ef 100644
--- a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
+++ b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep ax
+; RUN: llc < %s | grep ax
 ; PR2024
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
index 5b94a35..ff2b05f 100644
--- a/test/CodeGen/X86/2008-07-11-SHLBy1.ll
+++ b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o - | not grep shr
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
 define i128 @sl(i128 %x) {
         %t = shl i128 %x, 1
         ret i128 %t
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index 1d94638..f75e605 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static -disable-fp-elim |\
-; RUN:   %prcontext 65534 1 | grep movl | count 1
+; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim | FileCheck %s
 ; PR2536
 
+
+; CHECK: movw %cx
+; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
+; CHECK-NEXT: movl $17
+
 @g_5 = external global i16		; <i16*> [#uses=2]
 @g_107 = external global i16		; <i16*> [#uses=1]
 @g_229 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
index aa9ee50..f56604b 100644
--- a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 	%struct.SV = type { i8*, i64, i64 }
 @"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index ae30385..98919ee 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
 ; PR2539
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
diff --git a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
index a18564f..0f67145 100644
--- a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
+++ b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2566
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
index da6c089..684ca5c 100644
--- a/test/CodeGen/X86/2008-07-23-VSetCC.ll
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium
+; RUN: llc < %s -march=x86 -mcpu=pentium
 ; PR2575
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
 	br i1 false, label %bb.nph, label %._crit_edge
 
 bb.nph:		; preds = %bb.nph, %0
-	vicmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
 	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
 	lshr i32 %2, 31		; <i32>:3 [#uses=1]
 	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
@@ -27,4 +28,5 @@ bb.nph:		; preds = %bb.nph, %0
 	ret void
 }
 
+
 declare float @fmaxf(float, float)
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 2ebbe6e..1d166f4 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 56
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 59
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
index 9371c2a..4428035 100644
--- a/test/CodeGen/X86/2008-08-06-RewriterBug.ll
+++ b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2596
 
 @data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
diff --git a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
index b09211d..32f6ca0 100644
--- a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
+++ b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movzbl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
 
 define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
index 00bcdf8..8475e8d 100644
--- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xadd
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 @var = external global i64		; <i64*> [#uses=1]
 
 define i32 @main() nounwind {
 entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
 	tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 )		; <i64>:0 [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index 2c6828b..c76dd7d 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
 	%struct.QBasicAtomic = type { i32 }
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
index 4e35332..eacb4a5 100644
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -1,7 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movd | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
 ; PR2677
 
+
 	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
 
 define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index f793b52..101b3c5 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=core2 | grep pxor | count 2
-; RUN: llvm-as < %s | llc -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index e22b647..b92c789 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %ebp | count 9
-; RUN: llvm-as < %s | llc | grep %ecx | count 5
+; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ecx | count 5
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i386-pc-linux"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 7d01824..00ab735 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %rbp | count 7
-; RUN: llvm-as < %s | llc | grep %rcx | count 3
+; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rcx | count 3
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index ffe10d4..60be0d5 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; PR2687
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index 30a2b15..b3312d9 100644
--- a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2757
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 02dd04d..108f243 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2783
 
 @g_15 = external global i16		; <i16*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 9403344..534f990 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2748
 
 @g_73 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index ed8d345..74429c3 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%edx), %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
 
 ; %0 must not be put in EAX or EDX.
 ; In the first asm, $0 and $2 must not be put in EAX.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 62e3233..f5bd307 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
+; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
 ; operand.  There are many combinations that work; this is what llc puts out now.
diff --git a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
index 47feb83..a8f2912 100644
--- a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
+++ b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2808
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index d103f14..c92a8f4 100644
--- a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movs | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fld | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
 ; check 'inreg' attribute for sse_regparm
 
 define double @foo1() inreg nounwind {
diff --git a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
index b1f5ab5..f1ada28 100644
--- a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
+++ b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
 	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
diff --git a/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
index d4da01a..c36cf39 100644
--- a/test/CodeGen/X86/2008-09-29-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
 
 	%struct..0objc_selector = type opaque
 	%struct.NSString = type opaque
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 4f6eb59..935c4c5 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movz
+; RUN: llc < %s -march=x86 | not grep movz
 ; PR2835
 
 @g_407 = internal global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
index e74280c..b48c4ad 100644
--- a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
+++ b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; This version includes 64-bit version of binary operators (in 32-bit mode).
 ;; Swap, cmp-and-swap not supported yet in this mode.
 ; ModuleID = 'Atomics.c'
diff --git a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
index bd1ad59..7f7b1a4 100644
--- a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
+++ b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
 ; PR2850
 
 @tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
index 837aad5..a135cd4 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
 ; This NaN should be shortened to a double (not a float).
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
index d2e9b45..bd48105 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
 ; it is not safe to shorten any of these NaNs.
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
index 4808986..bc57612 100644
--- a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 
 define <4 x float> @f(float %w) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
index 979b787..efc6125 100644
--- a/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2735
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
index 608372e..4d3f8c2 100644
--- a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2775
 
 define i32 @func_77(i8 zeroext %p_79) nounwind {
diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
index 4318f1d..b8ca364 100644
--- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edx}
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
 
 	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
index e1dc7b6..de4c1e7 100644
--- a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
+++ b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2762
 define void @foo(<4 x i32>* %p, <4 x double>* %q) {
   %n = load <4 x i32>* %p
diff --git a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
index eb2ec37..b2e6061 100644
--- a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
+++ b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @test(i64 %x) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
index 33e8c49..353d1c7 100644
--- a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
+++ b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 ; from gcc.c-torture/compile/920520-1.c
 
diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
index d6ae05e..421b931 100644
--- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
 
 define void @f(float %wt) {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index ad13b85..afeb358 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
 
 define fastcc void @fourn(double* %data, i32 %isign) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index d8b0e70..784bc72 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -2,8 +2,8 @@
 ; Until it does, we shouldn't use movaps to access the stack.  On targets with
 ; sufficiently aligned stack (e.g. darwin) we should.
 
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
 
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
index 41776b2..7ad94f1 100644
--- a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
+++ b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2977
 define i8* @ap_php_conv_p2(){
 entry:
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index 36a054a..507799b 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | not grep 10
+; RUN: llc < %s -march=x86 -o - | not grep 10
 
 declare void @llvm.va_start(i8*) nounwind
 
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index 7acc7ca..f8f317c 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep testb
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
index 7487548..1dc97fc 100644
--- a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
+++ b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
 ; PR 1779
 ; Using 'A' constraint and a tied constraint together used to crash.
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
index fe1870e..2e114ab 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
index faf7cd4..7c811af 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
index 6c26b68..6dca141 100644
--- a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
index 81b25da..d96d806 100644
--- a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR3124
 
         %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
diff --git a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index ca5a80c..1f8bd45 100644
--- a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
 ; The inner loop should use [reg] addressing, not [reg+reg] addressing.
 ; rdar://6403965
 
diff --git a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
index 01e0f7e..4b72cb9 100644
--- a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
+++ b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3117
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index 48bb4e4..fe5bff3 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
index ba7dfbb..4cb1b42 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
index 5fb639d..d5a676a 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep sub | grep -v subsections | count 1
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; this should be rearranged to have two +s and one -
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
index b6b5cbd..7fd2e6f 100644
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
+; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
 
 	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 0000000..e97b63d
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
index 46b7018..6c70c5b 100644
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ b/test/CodeGen/X86/2008-12-16-BadShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 ; Note: this test is really trying to make sure that the shift
 ; returns the right result; shrl is most likely wrong,
 ; but if CodeGen starts legitimately using an shrl here,
diff --git a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
index 193d290..3080d08 100644
--- a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
+++ b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index c7fdfb2..13a9080 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 2 | grep mov
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:    movl	%esi, %eax
+
+
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
index 24be521..75773e0 100644
--- a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
+++ b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; -(-a) - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index e53a91e..2edcaea 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
 
 @X = external global [0 x i32]
 
diff --git a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
index 13cb9db..bae9283 100644
--- a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
+++ b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 4
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
index 7c800d4..27a7113 100644
--- a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
 ; PR3311
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index ecf71f6..9c71469 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
 
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
diff --git a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
index ff20dc1..99bef6c 100644
--- a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
+++ b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6501631
 
 	%CF = type { %Register }
diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
index 340608a..2eab5f1 100644
--- a/test/CodeGen/X86/2009-01-16-UIntToFP.ll
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
index 8857df3..f895336 100644
--- a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
+++ b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6505632
 ; reduced from 483.xalancbmk
 
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index b12e413..0583ef1 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
 ; PR3402
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
index db9dbb6..117ff47 100644
--- a/test/CodeGen/X86/2009-01-26-WrongCheck.ll
+++ b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
 ; PR3393
 
 define void @foo(i32 inreg %x) {
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index b0c27d8..8684f4a 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,38 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep {\\.cstring} | count 1
-	%struct.A = type {  }
-	%struct.NSString = type opaque
-	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
-	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
-	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16 }
-	%struct.objc_object = type opaque
-	%struct.objc_selector = type opaque
-@"\01L_unnamed_cfstring_0" = internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([1 x i8]* @"\01LC", i32 0, i32 0), i32 0 }, section "__DATA, __cfstring"		; <%struct.__builtin_CFString*> [#uses=1]
-@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
-@"\01LC" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4		; <%struct.objc_selector**> [#uses=2]
-@"\01L_OBJC_SYMBOLS" = internal global %struct._objc_symtab zeroinitializer, section "__OBJC,__symbols,regular,no_dead_strip", align 4		; <%struct._objc_symtab*> [#uses=2]
-@"\01L_OBJC_METH_VAR_NAME_0" = internal global [6 x i8] c"bork:\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[6 x i8]*> [#uses=2]
-@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
-@"\01L_OBJC_CLASS_NAME_0" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), %struct._objc_symtab* @"\01L_OBJC_SYMBOLS" }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [6 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast (%struct._objc_symtab* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata"		; <[6 x i8*]*> [#uses=0]
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
 
-define void @func(%struct.A* %a) nounwind {
-entry:
-	%a_addr = alloca %struct.A*		; <%struct.A**> [#uses=2]
-	%a.0 = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store %struct.A* %a, %struct.A** %a_addr
-	%0 = load %struct.A** %a_addr, align 4		; <%struct.A*> [#uses=1]
-	%1 = bitcast %struct.A* %0 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
-	store %struct.objc_object* %1, %struct.objc_object** %a.0, align 4
-	%2 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
-	%3 = load %struct.objc_object** %a.0, align 4		; <%struct.objc_object*> [#uses=1]
-	call void bitcast (%struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* @objc_msgSend to void (%struct.objc_object*, %struct.objc_selector*, %struct.NSString*)*)(%struct.objc_object* %3, %struct.objc_selector* %2, %struct.NSString* bitcast (%struct.__builtin_CFString* @"\01L_unnamed_cfstring_0" to %struct.NSString*)) nounwind
-	br label %return
+@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
-return:		; preds = %entry
-	ret void
-}
+@y = global [1 x i8]* @x
 
-declare %struct.objc_object* @objc_msgSend(%struct.objc_object*, %struct.objc_selector*, ...)
diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
index b7f37c9..ce3ea82 100644
--- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
 ; rdar://6538384
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/2009-01-31-BigShift.ll b/test/CodeGen/X86/2009-01-31-BigShift.ll
index 360b4f0..4eb0ec1 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 ; PR3401
 
 define void @x(i288 %i) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 2b5b189..9d24084 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov.*56}
+; RUN: llc < %s -march=x86 | grep {mov.*56}
 ; PR3449
 
 define void @test(<8 x double>* %P, i64* %Q) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift3.ll b/test/CodeGen/X86/2009-01-31-BigShift3.ll
index c92c86a..1b531e3 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift3.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3450
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-01-LargeMask.ll b/test/CodeGen/X86/2009-02-01-LargeMask.ll
index f2a964f..c4042e6 100644
--- a/test/CodeGen/X86/2009-02-01-LargeMask.ll
+++ b/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3453
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
index 5f97ee7..e75af13 100644
--- a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
+++ b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3411
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
index 1f29bdb..6ba046a 100644
--- a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
+++ b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep { - 92}
+; RUN: llc < %s | grep { - 92}
 ; PR3481
 ; The offset should print as -92, not +17179869092
 
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
index 39cad73..0ffa8fd 100644
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
 
 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
index 784c97a..2d0bbe6 100644
--- a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
+; RUN: llc < %s -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
 ; rdar://6566708
 
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
index cd30c1e..908cc08 100644
--- a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3486
 
 define i32 @foo(i8 signext %p_26) nounwind {
diff --git a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
index 7b73a86..1284b0d 100644
--- a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
+++ b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3537
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index b0c4449..72c7ee9 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
 ; PR3538
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
index ddd15f7..2e148ad 100644
--- a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$-81920} | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$4294885376} | count 1
+; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
+; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
 
 ; ModuleID = 'shant.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
index 1d10319..4f8a5e7 100644
--- a/test/CodeGen/X86/2009-02-12-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
 ; PR3561
 
 define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
index 54fcd43..58a7f9f 100644
--- a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
+++ b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
 
 define i32 @main() nounwind {
 bb4.i.thread:
diff --git a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
index a6bb7b8..b3dd13c 100644
--- a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
+++ b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep weak | count 3
+; RUN: llc < %s | grep weak | count 3
 ; PR3629
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 3dbfa80..7ea6998 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
 ; rdar://6608609
 
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 8bf6c23..cb1b1ef 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
 ; rdar://6627786
 
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-03-03-BTHang.ll b/test/CodeGen/X86/2009-03-03-BTHang.ll
index 0f338d8..bb95925 100644
--- a/test/CodeGen/X86/2009-03-03-BTHang.ll
+++ b/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6642541
 
  	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
index 6f16ced..9deeceb 100644
--- a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3686
 ; rdar://6661799
 
diff --git a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
index ccedaae..411a0c9 100644
--- a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
+++ b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
index 28302c0..39caddc 100644
--- a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
+++ b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 ; This should do a single load into the fp stack for the return, not diddle with xmm registers.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
index d7b5269..896c968 100644
--- a/test/CodeGen/X86/2009-03-09-APIntCrash.ll
+++ b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3763
 	%struct.__block_descriptor = type { i64, i64 }
 
diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
index 2ccd771..4224210 100644
--- a/test/CodeGen/X86/2009-03-09-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR3706
 
 define void @__mulxc3(x86_fp80 %b) nounwind {
diff --git a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
index 3d979e9..90dff88 100644
--- a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; rdar://r6661945
 
 	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
index 1f56317..d5ba93e 100644
--- a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
 
 @lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
 @lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index ec060e4..3564f01 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
 ; rdar://6668548
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index b01556d..878fa51 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -A 2 {call.*f} | grep movl
+; RUN: llc < %s -march=x86 | grep -A 2 {call.*f} | grep movl
 ; Check the register copy comes after the call to f and before the call to g
 ; PR3784
 
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index 091aab4..adbd241 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
+; RUN: llc < %s -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
 ; Check that register copies in the landing pad come after the EH_LABEL
 
 declare i32 @f()
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
index 09782a2..80e7639 100644
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
 ; rdar://6682365
 
 ; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
diff --git a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
index b5298ae..06dfdc0 100644
--- a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
 
 define fastcc void @optimize_bit_field() nounwind {
 bb4:
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index b30d41e..b5873ba 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
index 0619e12..e542325 100644
--- a/test/CodeGen/X86/2009-03-23-i80-fp80.ll
+++ b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep 302245289961712575840256
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep K40018000000000000000
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index 2c330db..f40fddc 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep shr %t
 ; rdar://6661955
diff --git a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
index 0e31942..f486479 100644
--- a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
+++ b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -no-implicit-float
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
-define double @t(double %x) nounwind ssp {
+define double @t(double %x) nounwind ssp noimplicitfloat {
 entry:
 	br i1 false, label %return, label %bb3
 
diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
index 1d4d2b6..97bbd93 100644
--- a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://6774324
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index bf1c8df..27f11cf 100644
--- a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s -fast-isel
 ; radr://6772169
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10"
diff --git a/test/CodeGen/X86/2009-04-12-picrel.ll b/test/CodeGen/X86/2009-04-12-picrel.ll
index 73062ab..f194280 100644
--- a/test/CodeGen/X86/2009-04-12-picrel.ll
+++ b/test/CodeGen/X86/2009-04-12-picrel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
 ; RUN: grep leaq %t | count 1
 
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
index d6f4b94..ff8cf0a 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
index 7f94c6c..4362ba4 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 0d66f69..bfa3eaa 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
 ; rdar://6787136
 
 	%struct.X = type { i8, [32 x i8] }
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 3e60f6b..f46eed4 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index 985eb21..4d25b0f 100644
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 750dba7..c6e6e50 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,7 +1,13 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 | \
-; RUN:   %prcontext {14} 2 | grep {(%ebp)} | count 1
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 < %s | \
+; RUN:   FileCheck %s
 ; rdar://6808032
 
+; CHECK: pextrw $14
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
 define void @update(i8** %args_list) nounwind {
 entry:
 	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 2835c2d..c1ec45f 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
 ; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
 ; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
 ; PR4004
 
diff --git a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
index 981d327..94d3eb2 100644
--- a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 ; rdar://6806252
 
 define i64 @test(i32* %tmp13) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
index b804a5b..7981a52 100644
--- a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR4034
 
 	%struct.BiContextType = type { i16, i8 }
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
index 1b757b1..d77e528 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4056
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
index 70cb4ff..f025654 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4051
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
index 0fb000c..0a2fcdb 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
 ; PR4076
 
 	type { i8, i8, i8 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index fc31c0b..a2fd2e4 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%ebx, 8(%esi)}
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.0"
 
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 767eb31..6843723 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10
+; RUN: llc < %s -mtriple=i386-apple-darwin10
 ; rdar://6837009
 
 	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index c02c045..d1f9cf8 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
 ; PR4099
 
 	type { [62 x %struct.Bitvec*] }		; type %0
diff --git a/test/CodeGen/X86/2009-04-scale.ll b/test/CodeGen/X86/2009-04-scale.ll
index 0766dc7..e4c756c 100644
--- a/test/CodeGen/X86/2009-04-scale.ll
+++ b/test/CodeGen/X86/2009-04-scale.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
 ; PR3995
 
         %struct.vtable = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index 284c6e2..738b5fb 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static > %t
+; RUN: llc < %s -relocation-model=static > %t
 ; RUN: grep "1: ._pv_cpu_ops+8" %t
 ; RUN: grep "2: ._G" %t
 ; PR4152
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index 8178725..a5e28c0 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4188
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
index 42bf9e9..6e062fb 100644
--- a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
index f4881ba..94773d9 100644
--- a/test/CodeGen/X86/2009-05-23-available_externally.ll
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep atoi | grep PLT
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
 ; PR4253
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 6f2bef4..8a0b244 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep -E {sar|shl|mov|or} | count 4
+; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4
 ; Check that the shr(shl X, 56), 48) is not mistakenly turned into
 ; a shr (X, -8) that gets subsequently "optimized away" as undef
 ; PR4254
diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 7bdfcb3..2fd42f4 100644
--- a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.tempsym_t = type { i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index 373f91f..af552d4 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
 
 define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
index ea33b16..779f985 100644
--- a/test/CodeGen/X86/2009-06-02-RewriterBug.ll
+++ b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
 ; PR4225
 
 define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c628b8a..e6f3008 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
 target triple = "x86_64-mingw64"
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 33d7972..cb64bf2 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -o %t1 -f
-; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, \\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+; RUN: llc < %s -o %t1
+; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
 target triple = "x86_64-mingw64"
 
 define i32 @a() nounwind {
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index fa90fa9..9415732 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	type { %struct.GAP }		; type %0
 	type { i16, i8, i8 }		; type %1
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 94df530..336f17e 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 220423a..5c51480 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
 ; RUN: grep movzbl %t1 | count 2
 ; RUN: grep movd %t1 | count 4
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 2e3f195..8bb3dc6 100644
--- a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
index 589a880..e361804 100644
--- a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 ; PR2598
 
 define <2 x float> @a(<2 x i32> %i) nounwind {
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
index a46fd1a..92419fc 100644
--- a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index c3687a5..07ef53e 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
index 001b7fc..673e936 100644
--- a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
 
 ; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
 ; calling convention out of sync with standard c calling convention on x86_64)
diff --git a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
index 095e6a1..feb5780 100644
--- a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
+++ b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | not grep TAILCALL 
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
 
 ; Bug 4396. This tail call can NOT be optimized.
 
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
index d6ff5b6..228cd48 100644
--- a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 ; PR2484
 
 define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
diff --git a/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 0000000..fcc71ae
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 0000000..eb9378b
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -disable-mmx
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}
diff --git a/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 0000000..0fdfdcb
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 0000000..eabaf77
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}
diff --git a/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 0000000..48af440
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
new file mode 100644
index 0000000..e21c892
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK: pavgw LCPI1_4(%rip)
+
+; rdar://7057804
+
+define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
+entry:
+	%0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i10 = add <8 x i16> %0, %3		; <<8 x i16>> [#uses=1]
+	%4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone		; <<8 x i16>> [#uses=3]
+	%6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i8 = add <8 x i16> %7, %10		; <<8 x i16>> [#uses=1]
+	%11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=4]
+	%21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%22 = bitcast <8 x i16> %21 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i6 = add <8 x i16> %23, %26		; <<8 x i16>> [#uses=1]
+	%27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i4 = add <8 x i16> %29, %32		; <<8 x i16>> [#uses=1]
+	%33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>		; <<8 x i16>> [#uses=1]
+	%36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i2 = add <8 x i16> %35, %38		; <<8 x i16>> [#uses=1]
+	%39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>		; <<8 x i16>> [#uses=1]
+	%42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i = add <8 x i16> %41, %44		; <<8 x i16>> [#uses=1]
+	%45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%48 = bitcast <8 x i16> %47 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%49 = bitcast <8 x i16> %28 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%50 = getelementptr i16* %out8x8, i64 8		; <i16*> [#uses=1]
+	%51 = bitcast i16* %50 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %49, <2 x i64>* %51, align 16
+	%52 = bitcast <8 x i16> %40 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%53 = getelementptr i16* %out8x8, i64 16		; <i16*> [#uses=1]
+	%54 = bitcast i16* %53 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %52, <2 x i64>* %54, align 16
+	%55 = getelementptr i16* %out8x8, i64 24		; <i16*> [#uses=1]
+	%56 = bitcast i16* %55 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %48, <2 x i64>* %56, align 16
+	%57 = bitcast <8 x i16> %46 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%58 = getelementptr i16* %out8x8, i64 40		; <i16*> [#uses=1]
+	%59 = bitcast i16* %58 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %57, <2 x i64>* %59, align 16
+	%60 = bitcast <8 x i16> %34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%61 = getelementptr i16* %out8x8, i64 48		; <i16*> [#uses=1]
+	%62 = bitcast i16* %61 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %60, <2 x i64>* %62, align 16
+	%63 = getelementptr i16* %out8x8, i64 56		; <i16*> [#uses=1]
+	%64 = bitcast i16* %63 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %22, <2 x i64>* %64, align 16
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 0000000..3e5bd34
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 0000000..a0095ab
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}
diff --git a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 0000000..e99edd6
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 0000000..e83b3a7
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 0000000..b9b09a3
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
+	ret <1 x i64> %t2
+}
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 0000000..b329c91
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+declare i32 @safe()
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 0000000..cc2f3d8
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR4668
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 0000000..9456d91
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}
diff --git a/test/CodeGen/X86/2009-08-12-badswitch.ll b/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 0000000..a94fce0
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-12-badswitch.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 0000000..6b0d6d9
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	type opaque		; type %0
+	type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 0000000..5f6cf3b
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 0000000..790fd88
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)
diff --git a/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..3da8f00
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
new file mode 100644
index 0000000..55432be
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
+; PR4689
+
+%struct.__s = type { [8 x i8] }
+%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
+%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
+
+define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
+; CHECK: hammer_time:
+; CHECK: movq $Xrsvd, %rax
+; CHECK: movq $Xrsvd, %rdi
+; CHECK: movq $Xrsvd, %r8
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %if.end
+  switch i32 undef, label %if.then76 [
+    i32 9, label %for.inc
+    i32 10, label %for.inc
+    i32 11, label %for.inc
+    i32 12, label %for.inc
+  ]
+
+if.then76:                                        ; preds = %for.body
+  unreachable
+
+for.inc:                                          ; preds = %for.body, %for.body, %for.body, %for.body
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
+  br label %for.body170
+
+for.body170:                                      ; preds = %for.body170, %for.end
+  store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
+  br i1 undef, label %for.end175, label %for.body170
+
+for.end175:                                       ; preds = %for.body170
+  unreachable
+}
+
+declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat
diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 0000000..9e58872
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+; CHECK: movl 16(%rbp), %edx
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
diff --git a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 0000000..18b5a17
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
new file mode 100644
index 0000000..646806e
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+
+; PR4958
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: main:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  br label %bb
+
+bb:                                               ; preds = %bb1, %entry
+; CHECK:      movl %e
+; CHECK-NEXT: addl $1
+; CHECK-NEXT: movl %e
+; CHECK-NEXT: adcl $0
+  %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ]      ; <i64> [#uses=1]
+  %0 = add nsw i64 %i.0, 1                        ; <i64> [#uses=2]
+  %1 = icmp sgt i32 0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %2 = icmp sle i64 %0, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 0000000..4f44cae
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}
diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 0000000..80b8835
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 0000000..33f35f8
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..d37d4b8
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}
diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
new file mode 100644
index 0000000..ef10ae5
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 1
+; rdar://7274692
+
+%0 = type { [125 x i32] }
+%1 = type { i32 }
+%struct..5sPragmaType = type { i8*, i32 }
+%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+%struct.AuxData = type { i8*, void (i8*)* }
+%struct.Bitvec = type { i32, i32, i32, %0 }
+%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+%struct.Context = type { i64, i32, %struct.Fifo }
+%struct.CountCtx = type { i64 }
+%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
+%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+%struct._OvflCell = type { i8*, i16 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct._ht = type { i32, %struct.HashElem* }
+%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
+%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
+%struct.sColMap = type { i32, i8* }
+%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+%struct.sqlite3InitInfo = type { i32, i32, i8 }
+%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+%struct.sqlite3_mutex = type opaque
+%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+%union.anon = type { double }
+
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
+@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
+@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
+  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
+  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  free i8* %1
+  store i8* null, i8** %0, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
+  br i1 %3, label %return, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
+  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
+  br i1 %6, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb2
+  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
+  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+bb1.i.i:                                          ; preds = %bb2
+  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
+  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
+  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i, label %bb3, label %bb5
+
+bb3:                                              ; preds = %isalpha.exit
+  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
+  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
+  br i1 %12, label %bb5, label %bb12.preheader
+
+bb5:                                              ; preds = %bb3, %isalpha.exit
+  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
+  br label %bb12.preheader
+
+bb12.preheader:                                   ; preds = %bb5, %bb3
+  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
+  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
+  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb7
+
+bb7:                                              ; preds = %bb11, %bb12.preheader
+  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
+  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
+  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
+  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
+  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
+  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
+  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
+  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
+  br i1 %17, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb7
+  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
+  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
+  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+bb1.i.i3:                                         ; preds = %bb7
+  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
+  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
+  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
+
+bb8:                                              ; preds = %isalnum.exit
+  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
+  br i1 %23, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
+  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
+  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
+  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
+  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
+  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
+  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
+  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
+  br i1 %28, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11, %bb12.preheader
+  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
+  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
+  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
+  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
+  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
+  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
+  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
+  store i8* %32, i8** %0, align 4
+  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
+  br i1 %33, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb13
+  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
+  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
+  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
+  tail call void @exit(i32 1) noreturn nounwind
+  unreachable
+
+bb17:                                             ; preds = %bb13
+  br i1 %30, label %bb23.preheader, label %bb18
+
+bb18:                                             ; preds = %bb17
+  store i8 39, i8* %32, align 4
+  br label %bb23.preheader
+
+bb23.preheader:                                   ; preds = %bb18, %bb17
+  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
+  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
+  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
+  br i1 %38, label %bb24, label %bb20
+
+bb20:                                             ; preds = %bb22, %bb23.preheader
+  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
+  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
+  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
+  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
+  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
+  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
+  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
+  store i8 %39, i8* %40, align 1
+  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
+  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
+  br i1 %42, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
+  store i8 39, i8* %43, align 1
+  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb20
+  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
+  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
+  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
+  br i1 %46, label %bb24, label %bb20
+
+bb24:                                             ; preds = %bb22, %bb23.preheader
+  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
+  br i1 %30, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
+  store i8 39, i8* %47, align 1
+  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24
+  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
+  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
+  store i8 0, i8* %49, align 1
+  ret void
+
+return:                                           ; preds = %bb1
+  ret void
+}
+
+declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
+
+declare void @exit(i32) noreturn nounwind
+
+declare i32 @__maskrune(i32, i32)
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index 7313670..de930d5 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=pic > %t
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
 ; RUN: grep {movswl	%ax, %edi} %t
 ; RUN: grep {movw	(%rax), %ax} %t
 ; XFAIL: *
diff --git a/test/CodeGen/X86/Atomics-32.ll b/test/CodeGen/X86/Atomics-32.ll
index 2a3e228..0e9b73e 100644
--- a/test/CodeGen/X86/Atomics-32.ll
+++ b/test/CodeGen/X86/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; Note the 64-bit variants are not supported yet (in 32-bit mode).
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 37b2e33..ac174b9 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 513599c..a6fd2d8 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,186 +1,16 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 43
-; RUN: grep movl %t | count 377
-; RUN: grep addl %t | count 179
-; RUN: grep subl %t | count 6
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: grep _GLOBAL_OFFSET_TABLE_ %t | count 148
-; RUN: grep @GOT %t | count 207
-; RUN: grep @GOTOFF %t | count 58
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 91
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 70
-; RUN: grep movq %t | count 56
-; RUN: grep addq %t | count 20
-; RUN: grep subq %t | count 14
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 139
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 98
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 59
-; RUN: grep movq %t | count 195
-; RUN: grep addq %t | count 36
-; RUN: grep subq %t | count 11
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 149
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 149
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 207
-
-
-
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: grep leal %t | count 31
-; RUN: grep movl %t | count 312
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 57
-; RUN: grep movl %t | count 292
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -206,6 +36,71 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo00() nounwind {
@@ -213,18 +108,191 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L4$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo02() nounwind {
@@ -233,6 +301,80 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo02() nounwind {
@@ -240,7 +382,81 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L6$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo03() nounwind {
@@ -248,12 +464,114 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L8$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo05() nounwind {
@@ -262,6 +580,70 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L9$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo06() nounwind {
@@ -269,12 +651,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L11$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo08() nounwind {
@@ -283,6 +764,68 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L12$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux00() nounwind {
@@ -290,6 +833,70 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx00() nounwind {
@@ -297,18 +904,202 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux02() nounwind {
@@ -317,7 +1108,81 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx02() nounwind {
@@ -326,7 +1191,81 @@ entry:
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L18$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L18$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux03() nounwind {
@@ -334,12 +1273,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L19$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L20$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux05() nounwind {
@@ -348,7 +1390,71 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L21$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux06() nounwind {
@@ -356,12 +1462,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L22$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L23$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux08() nounwind {
@@ -370,7 +1575,69 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L24$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind00(i64 %i) nounwind {
@@ -380,6 +1647,75 @@ entry:
 	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd00(i64 %i) nounwind {
@@ -389,6 +1725,75 @@ entry:
 	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L26$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind01(i64 %i) nounwind {
@@ -396,6 +1801,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd01(i64 %i) nounwind {
@@ -403,6 +1877,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L28$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind02(i64 %i) nounwind {
@@ -413,6 +1956,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd02(i64 %i) nounwind {
@@ -423,6 +2045,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L30$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L30$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind03(i64 %i) nounwind {
@@ -432,6 +2133,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L31$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L31$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind04(i64 %i) nounwind {
@@ -439,6 +2205,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L32$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L32$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind05(i64 %i) nounwind {
@@ -449,6 +2277,78 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L33$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind06(i64 %i) nounwind {
@@ -458,6 +2358,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L34$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L34$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind07(i64 %i) nounwind {
@@ -465,6 +2430,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L35$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L35$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind08(i64 %i) nounwind {
@@ -475,6 +2501,77 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L36$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off00(i64 %i) nounwind {
@@ -485,6 +2582,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf00(i64 %i) nounwind {
@@ -495,6 +2661,75 @@ entry:
 	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off01(i64 %i) nounwind {
@@ -503,6 +2738,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf01(i64 %i) nounwind {
@@ -511,6 +2815,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off02(i64 %i) nounwind {
@@ -522,6 +2895,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf02(i64 %i) nounwind {
@@ -533,6 +2985,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L42$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L42$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off03(i64 %i) nounwind {
@@ -543,6 +3074,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L43$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L43$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off04(i64 %i) nounwind {
@@ -551,6 +3147,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L44$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L44$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off05(i64 %i) nounwind {
@@ -562,6 +3220,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L45$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off06(i64 %i) nounwind {
@@ -572,6 +3302,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L46$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L46$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off07(i64 %i) nounwind {
@@ -580,6 +3375,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L47$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L47$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off08(i64 %i) nounwind {
@@ -591,6 +3447,77 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L48$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo00(i64 %i) nounwind {
@@ -598,12 +3525,136 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo01(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L50$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo02(i64 %i) nounwind {
@@ -613,6 +3664,80 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L51$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L51$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo03(i64 %i) nounwind {
@@ -620,12 +3745,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L52$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo04(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L53$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo05(i64 %i) nounwind {
@@ -635,6 +3863,70 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L54$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo06(i64 %i) nounwind {
@@ -642,12 +3934,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L55$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo07(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L56$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo08(i64 %i) nounwind {
@@ -657,6 +4048,68 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L57$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big00(i64 %i) nounwind {
@@ -667,6 +4120,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big01(i64 %i) nounwind {
@@ -675,6 +4197,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big02(i64 %i) nounwind {
@@ -686,6 +4277,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L60$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L60$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big03(i64 %i) nounwind {
@@ -696,6 +4366,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L61$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L61$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big04(i64 %i) nounwind {
@@ -704,6 +4439,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L62$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L62$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big05(i64 %i) nounwind {
@@ -715,6 +4512,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L63$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big06(i64 %i) nounwind {
@@ -725,6 +4594,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L64$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L64$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big07(i64 %i) nounwind {
@@ -733,6 +4667,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L65$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L65$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big08(i64 %i) nounwind {
@@ -744,81 +4739,782 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L66$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar02() nounwind {
 entry:
 	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar05() nounwind {
 entry:
 	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar08() nounwind {
 entry:
 	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har02() nounwind {
@@ -826,16 +5522,148 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har05() nounwind {
@@ -843,16 +5671,143 @@ entry:
 	%0 = load i32** @dptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har08() nounwind {
@@ -860,26 +5815,260 @@ entry:
 	%0 = load i32** @lptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat02() nounwind {
@@ -888,16 +6077,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L93$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L95$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat05() nounwind {
@@ -906,16 +6239,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L96$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L98$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat08() nounwind {
@@ -924,21 +6394,217 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L99$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxm01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L102$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam02() nounwind {
@@ -947,16 +6613,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L103$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L105$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam05() nounwind {
@@ -965,16 +6775,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L106$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L108$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam08() nounwind {
@@ -983,6 +6930,58 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L109$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat00(i64 %i) nounwind {
@@ -991,6 +6990,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt00(i64 %i) nounwind {
@@ -999,6 +7051,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat01(i64 %i) nounwind {
@@ -1007,6 +7112,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt01(i64 %i) nounwind {
@@ -1015,6 +7173,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat02(i64 %i) nounwind {
@@ -1024,6 +7235,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L114$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat03(i64 %i) nounwind {
@@ -1032,6 +7306,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat04(i64 %i) nounwind {
@@ -1040,6 +7365,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L116$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat05(i64 %i) nounwind {
@@ -1049,6 +7425,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L117$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat06(i64 %i) nounwind {
@@ -1057,6 +7491,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat07(i64 %i) nounwind {
@@ -1065,6 +7550,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L119$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat08(i64 %i) nounwind {
@@ -1074,6 +7610,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam00(i64 %i) nounwind {
@@ -1082,6 +7675,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm00(i64 %i) nounwind {
@@ -1090,6 +7736,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam01(i64 %i) nounwind {
@@ -1098,6 +7797,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm01(i64 %i) nounwind {
@@ -1106,6 +7858,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam02(i64 %i) nounwind {
@@ -1115,6 +7920,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L125$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam03(i64 %i) nounwind {
@@ -1123,6 +7991,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam04(i64 %i) nounwind {
@@ -1131,6 +8050,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L127$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam05(i64 %i) nounwind {
@@ -1140,6 +8110,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L128$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam06(i64 %i) nounwind {
@@ -1148,6 +8176,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam07(i64 %i) nounwind {
@@ -1156,6 +8235,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L130$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam08(i64 %i) nounwind {
@@ -1165,6 +8295,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	call	L131$pb
+; DARWIN-32-PIC-NEXT: L131$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L131$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcallee() nounwind {
@@ -1177,6 +8364,123 @@ entry:
 	tail call void @x() nounwind
 	tail call void @x() nounwind
 	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @x()
@@ -1191,6 +8495,123 @@ entry:
 	tail call void @y() nounwind
 	tail call void @y() nounwind
 	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @y()
@@ -1198,6 +8619,48 @@ declare void @y()
 define void ()* @address() nounwind {
 entry:
 	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @callee()
@@ -1205,11 +8668,95 @@ declare void @callee()
 define void ()* @laddress() nounwind {
 entry:
 	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void ()* @daddress() nounwind {
 entry:
 	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	call	L136$pb
+; DARWIN-32-PIC-NEXT: L136$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L136$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @caller() nounwind {
@@ -1217,6 +8764,73 @@ entry:
 	tail call void @callee() nounwind
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dcaller() nounwind {
@@ -1224,6 +8838,73 @@ entry:
 	tail call void @dcallee() nounwind
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcaller() nounwind {
@@ -1231,24 +8912,262 @@ entry:
 	tail call void @lcallee() nounwind
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @tailcaller() nounwind {
 entry:
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dtailcaller() nounwind {
 entry:
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ltailcaller() nounwind {
 entry:
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @icaller() nounwind {
@@ -1258,6 +9177,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L143$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dicaller() nounwind {
@@ -1267,6 +9266,79 @@ entry:
 	%1 = load void ()** @difunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @licaller() nounwind {
@@ -1276,6 +9348,78 @@ entry:
 	%1 = load void ()** @lifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @itailcaller() nounwind {
@@ -1285,6 +9429,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L146$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ditailcaller() nounwind {
@@ -1292,6 +9516,66 @@ entry:
 	%0 = load void ()** @difunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	call	*(%rax)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @litailcaller() nounwind {
@@ -1299,4 +9583,64 @@ entry:
 	%0 = load void ()** @lifunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L148$pb
+; DARWIN-32-PIC-NEXT: L148$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L148$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/add-trick32.ll b/test/CodeGen/X86/add-trick32.ll
index 42909b4..e86045d 100644
--- a/test/CodeGen/X86/add-trick32.ll
+++ b/test/CodeGen/X86/add-trick32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep add %t
 ; RUN: grep subl %t | count 1
 
diff --git a/test/CodeGen/X86/add-trick64.ll b/test/CodeGen/X86/add-trick64.ll
index 5466d9d..2f1fcee 100644
--- a/test/CodeGen/X86/add-trick64.ll
+++ b/test/CodeGen/X86/add-trick64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep add %t
 ; RUN: grep subq %t | count 2
 
diff --git a/test/CodeGen/X86/add-with-overflow.ll b/test/CodeGen/X86/add-with-overflow.ll
index d015ceb..0f705dc 100644
--- a/test/CodeGen/X86/add-with-overflow.ll
+++ b/test/CodeGen/X86/add-with-overflow.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jb} | count 2
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3aadd05..0b26859 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t -f
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
 ; RUN: grep set %t   | count 7
 ; RUN: grep globl %t | count 6
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
index b2dc77d..c0f3a81 100644
--- a/test/CodeGen/X86/aligned-comm.ll
+++ b/test/CodeGen/X86/aligned-comm.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin8 | not grep {7}
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin8 | not grep {7}
 
 ; Darwin 9+ should get alignment on common symbols.  Darwin8 does 
 ; not support this.
diff --git a/test/CodeGen/X86/all-ones-vector.ll b/test/CodeGen/X86/all-ones-vector.ll
index 01c0e36..10fecad 100644
--- a/test/CodeGen/X86/all-ones-vector.ll
+++ b/test/CodeGen/X86/all-ones-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
 
 define <4 x i32> @ioo() nounwind {
         ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 0bd97c2..f45e9b8 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 3501047..7733b8a 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and | count 1
+; RUN: llc < %s -march=x86 | grep and | count 1
 
 ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
 ; in this case.
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index bdc8454..b5ac23b 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%} | count 1
+; RUN: llc < %s -march=x86 | grep {(%} | count 1
 
 ; Don't duplicate the load.
 
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
index e8c3cf0..0cf169e 100644
--- a/test/CodeGen/X86/anyext-uses.ll
+++ b/test/CodeGen/X86/anyext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 8
 ; RUN: not grep implicit %t
 
diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll
new file mode 100644
index 0000000..106fe83
--- /dev/null
+++ b/test/CodeGen/X86/anyext.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
diff --git a/test/CodeGen/X86/arg-cast.ll b/test/CodeGen/X86/arg-cast.ll
index 2e2bc3c..c111514 100644
--- a/test/CodeGen/X86/arg-cast.ll
+++ b/test/CodeGen/X86/arg-cast.ll
@@ -1,7 +1,7 @@
 ; This should compile to movl $2147483647, %eax + andl only.
-; RUN: llvm-as < %s | llc | grep andl
-; RUN: llvm-as < %s | llc | not grep movsd
-; RUN: llvm-as < %s | llc | grep esp | not grep add
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
 ; rdar://5736574
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/asm-block-labels.ll b/test/CodeGen/X86/asm-block-labels.ll
index 284a9fb..a43d430 100644
--- a/test/CodeGen/X86/asm-block-labels.ll
+++ b/test/CodeGen/X86/asm-block-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc
+; RUN: opt < %s -std-compile-opts | llc
 ; ModuleID = 'block12.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 333c768..96da224 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test1 \$_GV}
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test2 _GV}
 ; PR882
 
diff --git a/test/CodeGen/X86/asm-indirect-mem.ll b/test/CodeGen/X86/asm-indirect-mem.ll
index 7f3353f..c57aa99 100644
--- a/test/CodeGen/X86/asm-indirect-mem.ll
+++ b/test/CodeGen/X86/asm-indirect-mem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR2267
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-modifier-P.ll b/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 0000000..6139da8
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier-P.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/asm-modifier.ll b/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 0000000..44f972e
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 0000000..d00f8e8
--- /dev/null
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: subw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 6871a08..3ef1887 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f
+; RUN: llc < %s -march=x86 -o %t1
 ; RUN: grep "lock" %t1 | count 17
 ; RUN: grep "xaddl" %t1 | count 4 
 ; RUN: grep "cmpxchgl"  %t1 | count 13 
diff --git a/test/CodeGen/X86/attribute-sections.ll b/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 0000000..3035334
--- /dev/null
+++ b/test/CodeGen/X86/attribute-sections.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+
diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 0000000..8003de2
--- /dev/null
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index 9f0aeb3..03e69e7 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep align | count 3
+; RUN: llc < %s -march=x86 | grep align | count 3
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index dfc5818..3e68f94 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep align | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep align | count 1
 
 @A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/bitcast-int-to-vector.ll b/test/CodeGen/X86/bitcast-int-to-vector.ll
index 370bec0..4c25979 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @foo(i64 %a)
 {
diff --git a/test/CodeGen/X86/bitcast.ll b/test/CodeGen/X86/bitcast.ll
index f575409..c34c675 100644
--- a/test/CodeGen/X86/bitcast.ll
+++ b/test/CodeGen/X86/bitcast.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 ; PR1033
 
 define i64 @test1(double %t) {
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 3e26931..48922b5 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep rsp
+; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | not grep rsp
 
 define i64 @test1(double %A) {
    %B = bitcast double %A to i64
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index b9ce10f..6b245c1 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies=false > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=false > %t
 ; RUN:   grep {%xmm0} %t | count 14
 ; RUN:   not grep {%xmm1} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies > %t
 ; RUN:   grep {%xmm0} %t | count 7
 ; RUN:   grep {%xmm1} %t | count 7
 
diff --git a/test/CodeGen/X86/bss_pagealigned.ll b/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 0000000..4a1049b
--- /dev/null
+++ b/test/CodeGen/X86/bss_pagealigned.ll
@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: call    memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 91f8310..5bf58fa 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep APP %t
 ; RUN: grep bswapq %t | count 2
 ; RUN: grep bswapl %t | count 1
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 592e25b..0a72c1c 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,8 +1,8 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep bswapl | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep rolw | count 1
+; RUN: llc < %s -march=x86 | grep rolw | count 1
 
 declare i16 @llvm.bswap.i16(i16)
 
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index a76242c..ec447e5 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep btl | count 28
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index a75214a..af36e1b 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rsp), %rax}
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movl	8(%esp), %edx} %t
 ; RUN: grep {movl	4(%esp), %eax} %t
 
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index f85c8ff..71129f5 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 707a4c5..504e0be 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 5576c36..4db9d65 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index c6f4588..69c115b 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index 47269d2..b060369 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | not grep 16
+; RUN: llc < %s -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 6b64c6c..0da93ba 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
 
 	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
                            <2 x i64> }
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 6e9c70d..87785bc 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {call.*\*%rax}
+; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
 
 ; PR3666
 ; PR3773
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index ad9b796..7bae5cd 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-fp-elim | grep subl | count 1
+; RUN: llc < %s -march=x86 -disable-fp-elim | grep subl | count 1
 
         %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
         %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
index 87194d6..d520a6f 100644
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
-; RUN: grep {cmpl	\$4294966818,} %t
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {cmpl	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
 ; RUN: not grep {leal	-1(} %t
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 49b691f..a9ddbdb 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {cmpq	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index c3b3b41..3f27187 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep bsf
-; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3
+; RUN: llc < %s -march=x86 | grep bsr | count 2
+; RUN: llc < %s -march=x86 | grep bsf
+; RUN: llc < %s -march=x86 | grep cmov | count 3
 
 define i32 @t1(i32 %x) nounwind  {
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
new file mode 100644
index 0000000..f3c9a7a
--- /dev/null
+++ b/test/CodeGen/X86/cmov.ll
@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovae	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovb	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovne  %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK: testb %al, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
index 91c8a87..898c09b 100644
--- a/test/CodeGen/X86/cmp-test.ll
+++ b/test/CodeGen/X86/cmp-test.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep test | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep test | count 1
 
 define i32 @f1(i32 %X, i32* %y) {
 	%tmp = load i32* %y		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index f66f90c..de89374 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp eq i64 %x, 0
diff --git a/test/CodeGen/X86/cmp1.ll b/test/CodeGen/X86/cmp1.ll
index 241618c5..d4aa399 100644
--- a/test/CodeGen/X86/cmp1.ll
+++ b/test/CodeGen/X86/cmp1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp slt i64 %x, 1
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
index 2c046ff..9a8e00c 100644
--- a/test/CodeGen/X86/cmp2.ll
+++ b/test/CodeGen/X86/cmp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
 
 define i32 @test(double %A) nounwind  {
  entry:
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 0000000..0fe4e56
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %eax}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 9939424..8aa0bfd 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1877
 
 @NNTOT = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index c67e0f5..5d10bba 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddw | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | grep paddw | count 2
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 ; The 2-addr pass should ensure that identical code is produced for these functions
 ; no extra copy should be generated.
diff --git a/test/CodeGen/X86/coalescer-commute3.ll b/test/CodeGen/X86/coalescer-commute3.ll
index 7d4a80a..e5bd448 100644
--- a/test/CodeGen/X86/coalescer-commute3.ll
+++ b/test/CodeGen/X86/coalescer-commute3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 9628f93..02a9781 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1501
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll
index c730ea7..510d115 100644
--- a/test/CodeGen/X86/coalescer-commute5.ll
+++ b/test/CodeGen/X86/coalescer-commute5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 0000000..7d6f399
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; rdar://6509240
+
+	type { %struct.TValue }		; type %0
+	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index ab029f4..4db520f 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep xor | count 3
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
 
 @val = internal global i64 0		; <i64*> [#uses=1]
 @"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/code_placement.ll b/test/CodeGen/X86/code_placement.ll
index 5516795..9747183 100644
--- a/test/CodeGen/X86/code_placement.ll
+++ b/test/CodeGen/X86/code_placement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | %prcontext jmp 1 | grep align
+; RUN: llc -march=x86 < %s | FileCheck %s
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
@@ -12,6 +12,8 @@ entry:
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
 
 bb:		; preds = %bb1, %entry
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index ae3eb5f..2a8ead8 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR4297
 
 target datalayout =
diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 0000000..b6ca1ce
--- /dev/null
+++ b/test/CodeGen/X86/codemodel.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll
index a78a042..b49d081 100644
--- a/test/CodeGen/X86/combine-lds.ll
+++ b/test/CodeGen/X86/combine-lds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fldl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
 
 define double @doload64(i64 %x) nounwind  {
 	%tmp717 = bitcast i64 %x to double
diff --git a/test/CodeGen/X86/combiner-aa-0.ll b/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 0000000..a61ef7a
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-0.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}
diff --git a/test/CodeGen/X86/combiner-aa-1.ll b/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 0000000..58a7129
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index 12c0e03..d810cb1 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
 
 @a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 224f5d5..56ea26b 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,7 +2,7 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {\\\<mov\\\>} | count 6
 
 
diff --git a/test/CodeGen/X86/compare-add.ll b/test/CodeGen/X86/compare-add.ll
index aa69a31..358ee59 100644
--- a/test/CodeGen/X86/compare-add.ll
+++ b/test/CodeGen/X86/compare-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | not grep add
 
 define i1 @X(i32 %X) {
         %Y = add i32 %X, 14             ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 0000000..2be90c9
--- /dev/null
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
diff --git a/test/CodeGen/X86/compare_folding.ll b/test/CodeGen/X86/compare_folding.ll
index c6cda4a..84c152d 100644
--- a/test/CodeGen/X86/compare_folding.ll
+++ b/test/CodeGen/X86/compare_folding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep movsd | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep ucomisd
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 0000000..be8de5e
--- /dev/null
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 05adb50..7e7acaa 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 2
 
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
index 6e3156b..ca8cc14 100644
--- a/test/CodeGen/X86/const-select.ll
+++ b/test/CodeGen/X86/const-select.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)}
+; RUN: llc < %s | grep {LCPI1_0(,%eax,4)}
 define float @f(i32 %x) nounwind readnone {
 entry:
 	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
@@ -10,7 +10,7 @@ entry:
 	ret float %iftmp.0.0
 }
 
-; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax}
+; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
 define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
 entry:
 	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 80be854..05388f9 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/constpool.ll b/test/CodeGen/X86/constpool.ll
index 60d51e5..2aac486 100644
--- a/test/CodeGen/X86/constpool.ll
+++ b/test/CodeGen/X86/constpool.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -fast-isel
-; RUN: llvm-as < %s | llc -march=x86-64
-; RUN: llvm-as < %s | llc -fast-isel -march=x86-64
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
 ; PR4466
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 579e30c..2b4b832 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o %t -f -stats -info-output-file - | \
+; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
 ; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 5
 ; RUN: grep {leal	1(\%rsi),} %t
 
diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll
index a08fa65..47522d8 100644
--- a/test/CodeGen/X86/copysign-zero.ll
+++ b/test/CodeGen/X86/copysign-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep orpd
-; RUN: llvm-as < %s | llc | grep andpd | count 1
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index 4539ef6..4fe554d 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
+; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
 
 	%CC = type { %Register }
 	%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/cstring.ll b/test/CodeGen/X86/cstring.ll
index 27d6181..5b5a766 100644
--- a/test/CodeGen/X86/cstring.ll
+++ b/test/CodeGen/X86/cstring.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
 ; rdar://6479858
 
 @str1 = internal constant [1 x i8] zeroinitializer
diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
index ba84711..edcfeb7 100644
--- a/test/CodeGen/X86/dag-rauw-cse.ll
+++ b/test/CodeGen/X86/dag-rauw-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1}
+; RUN: llc < %s -march=x86 | grep {orl	\$1}
 ; PR3018
 
 define i32 @test(i32 %A) nounwind {
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index b96fdfc..c0ee2ac 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index a673ebf..c3c7990 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index c292140..a3c1e6f 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep __bzero
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
diff --git a/test/CodeGen/X86/darwin-no-dead-strip.ll b/test/CodeGen/X86/darwin-no-dead-strip.ll
index 63325b7..452d1f8 100644
--- a/test/CodeGen/X86/darwin-no-dead-strip.ll
+++ b/test/CodeGen/X86/darwin-no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep no_dead_strip
+; RUN: llc < %s | grep no_dead_strip
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/darwin-quote.ll b/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 0000000..8fddc11
--- /dev/null
+++ b/test/CodeGen/X86/darwin-quote.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"
diff --git a/test/CodeGen/X86/darwin-stub.ll b/test/CodeGen/X86/darwin-stub.ll
index 79eb31a..b4d2e1a 100644
--- a/test/CodeGen/X86/darwin-stub.ll
+++ b/test/CodeGen/X86/darwin-stub.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin  |     grep stub
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | not grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
 
 @"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
index aa690f7..f0ada41 100644
--- a/test/CodeGen/X86/div_const.ll
+++ b/test/CodeGen/X86/div_const.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 365384439
+; RUN: llc < %s -march=x86 | grep 365384439
 
 define i32 @f9188_mul365384439_shift27(i32 %A) {
         %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll
index a611edd..e86b52f 100644
--- a/test/CodeGen/X86/divrem.ll
+++ b/test/CodeGen/X86/divrem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 8
 
 define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
 	%r = sdiv i64 %x, %y
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 0000000..c634c7e
--- /dev/null
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: call	*__imp__foo
+  call void @foo()
+  ret void
+}
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 885700e..3b26319 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -1,12 +1,13 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$bar)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$qux)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$hen)} | count 1
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s
 ; PR1339
 
 @"$bar" = global i32 zeroinitializer
 @"$qux" = external global i32
 
 define i32 @"$foo"() nounwind {
+; CHECK: movl	($bar),
+; CHECK: addl	($qux),
+; CHECK: call	($hen)
   %m = load i32* @"$bar"
   %n = load i32* @"$qux"
   %t = add i32 %m, %n
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 049a32c..1df0920 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 | not egrep {\\\$4294967289|-7\\(}
-; RUN: llvm-as < %s | llc -march=x86 | egrep {\\\$4294967280|-16\\(}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -march=x86-64 | grep {\\-16}
 
-define void @t() {
+define void @t() nounwind {
 A:
 	br label %entry
 
diff --git a/test/CodeGen/X86/empty-struct-return-type.ll b/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 0000000..34cd5d9
--- /dev/null
+++ b/test/CodeGen/X86/empty-struct-return-type.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 5a378e1..52dcb61 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/extend.ll b/test/CodeGen/X86/extend.ll
index a54b6f1..9553b1b 100644
--- a/test/CodeGen/X86/extend.ll
+++ b/test/CodeGen/X86/extend.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
 
 @G1 = internal global i8 0              ; <i8*> [#uses=1]
 @G2 = internal global i8 0              ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
index 0cc5630..01e32aa 100644
--- a/test/CodeGen/X86/extern_weak.ll
+++ b/test/CodeGen/X86/extern_weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep weak_reference | count 2
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2
 
 @Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]
 
diff --git a/test/CodeGen/X86/extmul128.ll b/test/CodeGen/X86/extmul128.ll
index df48765..9b59829 100644
--- a/test/CodeGen/X86/extmul128.ll
+++ b/test/CodeGen/X86/extmul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 2
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
 
 define i128 @i64_sext_i128(i64 %a, i64 %b) {
   %aa = sext i64 %a to i128
diff --git a/test/CodeGen/X86/extmul64.ll b/test/CodeGen/X86/extmul64.ll
index 635da48..9e20ded 100644
--- a/test/CodeGen/X86/extmul64.ll
+++ b/test/CodeGen/X86/extmul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 2
+; RUN: llc < %s -march=x86 | grep mul | count 2
 
 define i64 @i32_sext_i64(i32 %a, i32 %b) {
   %aa = sext i32 %a to i64
diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
index 842ec24..2040e87 100644
--- a/test/CodeGen/X86/extract-combine.ll
+++ b/test/CodeGen/X86/extract-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
 ; RUN: not grep unpcklps %t
 
 define i32 @foo() nounwind {
diff --git a/test/CodeGen/X86/extract-extract.ll b/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 0000000..ad79ab9
--- /dev/null
+++ b/test/CodeGen/X86/extract-extract.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+
diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll
index 44704b6..4ea37f0 100644
--- a/test/CodeGen/X86/extractelement-from-arg.ll
+++ b/test/CodeGen/X86/extractelement-from-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test(float* %R, <4 x float> %X) nounwind {
 	%tmp = extractelement <4 x float> %X, i32 3
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index 601690e..ee57d9b 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
 
 define i32 @t(<2 x i64>* %val) nounwind  {
 	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index b00c8e4..12a2ef3 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
 ; following program.  The bug is DAGCombine assumes that the bit convert
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 484d2c4..14778f0 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
 ; RUN: not grep movd %t
 ; RUN: grep {movss	%xmm} %t | count 1
 ; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 7ac8e04..54947c3 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,8 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
 ; RUN:   count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
 ; RUN:   grep fabs\$ | count 3
 
 declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
index 941f708..5e88ed7 100644
--- a/test/CodeGen/X86/fast-cc-callee-pops.ll
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
 
 ; Check that a fastcc function pops its stack variables before returning.
 
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index 3f3aa46..e151821 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index c8621a7..fe96c0c 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {mov	EDX, 1}
 ; check that fastcc is passing stuff in regs.
 
diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
index fb4f37e..9072c5c 100644
--- a/test/CodeGen/X86/fast-isel-bail.ll
+++ b/test/CodeGen/X86/fast-isel-bail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0
+; RUN: llc < %s -march=x86 -O0
 
 ; This file is for regression tests for cases where FastISel needs
 ; to gracefully bail out and let SelectionDAGISel take over.
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 0000000..f2696ce
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(<1 x i64>)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK: movl $2, %eax
+; CHECK: movd %rax, %mm0
+; CHECK: movd %mm0, %rdi
+
+        call void @func2(<1 x i64> <i64 2>)
+        ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 9945746..5fcdbbb 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 | grep and
+; RUN: llc < %s -fast-isel -march=x86 | grep and
 
 define i32 @t() nounwind {
 tak:
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index ac2595a..84d10f3 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {LCPI1_0(%rip)}
+; RUN: llc < %s -fast-isel | grep {LCPI1_0(%rip)}
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 0000000..5ffd48b
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 0000000..5b8acec
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%ecx,%eax,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%edi, %rax
+; X64:  	movl	(%rsi,%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%rsi,%rdi,4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%rdi), %al
+; X64:  	ret
+
+}
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index b2f8850..34f8b38 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {_kill@GOTPCREL(%rip)}
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index e1ff792..d066578 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel | grep {andb	\$1, %}
+; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
 
 declare i64 @bar(i64)
 
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index dfee4f2..35ec1e7 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -fast-isel -mtriple=i386-apple-darwin | \
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
 ; RUN:   grep lazy_ptr, | count 2
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 -relocation-model=static | \
+; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
 ; RUN:   grep lea
 
 @src = external global i32
diff --git a/test/CodeGen/X86/fast-isel-phys.ll b/test/CodeGen/X86/fast-isel-phys.ll
index 91dcca5..158ef55 100644
--- a/test/CodeGen/X86/fast-isel-phys.ll
+++ b/test/CodeGen/X86/fast-isel-phys.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86
 
 define i8 @t2(i8 %a, i8 %c) nounwind {
        %tmp = shl i8 %a, %c
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
index 7d8c9f5..35f7a72 100644
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {sarl	\$80, %eax}
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
 ; PR3242
 
 define i32 @foo(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index 6f4d202..c3e527c 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 4dd14e6..a5e6642 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
 ; PR3654
 
 @v = thread_local global i32 0
diff --git a/test/CodeGen/X86/fast-isel-trunc.ll b/test/CodeGen/X86/fast-isel-trunc.ll
index 039f114..69b26c5 100644
--- a/test/CodeGen/X86/fast-isel-trunc.ll
+++ b/test/CodeGen/X86/fast-isel-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel -fast-isel-abort
-; RUN: llvm-as < %s | llc -march=x86-64 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort
 
 define i8 @t1(i32 %x) signext nounwind  {
 	%tmp1 = trunc i32 %x to i8
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index a9a016b..3dcd736 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
 
 ; This tests very minimal fast-isel functionality.
 
@@ -64,3 +64,12 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   %t = inttoptr i32 %p to i8*
   ret i8* %t
 }
+
+define void @store_i1(i1* %p, i1 %t) nounwind {
+  store i1 %t, i1* %p
+  ret void
+}
+define i1 @load_i1(i1* %p) nounwind {
+  %t = load i1* %p
+  ret i1 %t
+}
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index d2db279..2b48f5f 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=mingw32 | \
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \
 ; RUN:   grep {@12}
 
 ; Check that a fastcall function gets correct mangling
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index 40c753e..d044a2a 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index 94da505..52b3e57 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index 7fc111b..d457418 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index f18f34d..d538264 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,5 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep ecx | grep 0
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep xmm0 | grep 8
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index c4f9587..6020530 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
 
 define i32 @test(i32 %f12) {
 	%tmp7.25 = lshr i32 %f12, 16		
diff --git a/test/CodeGen/X86/fildll.ll b/test/CodeGen/X86/fildll.ll
index 65944fd..c5a3765 100644
--- a/test/CodeGen/X86/fildll.ll
+++ b/test/CodeGen/X86/fildll.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
 
 define fastcc double @sint64_to_fp(i64 %X) {
         %R = sitofp i64 %X to double            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
index 73aa713..03bad65 100644
--- a/test/CodeGen/X86/fmul-zero.ll
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -enable-unsafe-fp-math | not grep mulps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mulps
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
 
 define void @test14(<4 x float>*) nounwind {
         load <4 x float>* %0, align 1
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 2828ad2..5e80ea5 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.6"
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 705b795..9f79f77 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 define i32 @t1(i8* %X, i32 %i) {
 entry:
diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll
index 349f986..7a2b038 100644
--- a/test/CodeGen/X86/fold-call-2.ll
+++ b/test/CodeGen/X86/fold-call-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
 
 @f = external global void ()*		; <void ()**> [#uses=1]
 
diff --git a/test/CodeGen/X86/fold-call-3.ll b/test/CodeGen/X86/fold-call-3.ll
index 824ae00..337a7ed 100644
--- a/test/CodeGen/X86/fold-call-3.ll
+++ b/test/CodeGen/X86/fold-call-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep call | grep 560
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
 ; rdar://6522427
 
 	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 5399171..603e9ad 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 declare void @bar()
 
diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
index 1623f31..f1fcbcf 100644
--- a/test/CodeGen/X86/fold-imm.ll
+++ b/test/CodeGen/X86/fold-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc
-; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 4
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
 
 define i32 @test(i32 %X) nounwind {
 entry:
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 6e3da5c..eb182da 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 312427a..0351eca 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index f558aca..ef5202f 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index e2141eb..cc4198d 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep pcmpeqd %t | count 1
 ; RUN: grep xor %t | count 1
 ; RUN: not grep LCP %t
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 2b75781..49f8795 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase should need to spill the -1 value on x86-32,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index 1016b10..2605123 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq | count 1
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
 ; PR4050
 
 	type { i64 }		; type %0
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index 32ba217..cafc61a 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,6 +1,6 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
 ; RUN:   grep {.long.1123418112}
 
 define double @D() {
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 15606c3..08ea77d 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
 
 ; These operations should be done in integer registers, eliminating constant
 ; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
index f47fd74..321e267 100644
--- a/test/CodeGen/X86/fp-stack-2results.ll
+++ b/test/CodeGen/X86/fp-stack-2results.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fld1
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
 
 ; This is basically this code on x86-64:
 ; _Complex long double test() { return 1.0; }
diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 0000000..4768ea2
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=local -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4e61d0f..4bdf459 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:   grep {fucomi.*st.\[12\]}
 ; PR1012
 
diff --git a/test/CodeGen/X86/fp-stack-direct-ret.ll b/test/CodeGen/X86/fp-stack-direct-ret.ll
index 78be2a3..5a28bb5 100644
--- a/test/CodeGen/X86/fp-stack-direct-ret.ll
+++ b/test/CodeGen/X86/fp-stack-direct-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep fstp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep movsd
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
 
 declare double @foo()
 
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index 5254e1c..f220b24 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep cvtss2sd
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep fstps
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep cvtsd2ss
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
index 56392de..05dfc54 100644
--- a/test/CodeGen/X86/fp-stack-ret-store.ll
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep movss
+; RUN: llc < %s -mcpu=yonah | not grep movss
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 3e6ad54..c83a0cb 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
 ; RUN: grep fldl %t | count 1
 ; RUN: not grep xmm %t
 ; RUN: grep {sub.*esp} %t | count 1
diff --git a/test/CodeGen/X86/fp-stack-retcopy.ll b/test/CodeGen/X86/fp-stack-retcopy.ll
index 997f8df..67dcb18 100644
--- a/test/CodeGen/X86/fp-stack-retcopy.ll
+++ b/test/CodeGen/X86/fp-stack-retcopy.ll
@@ -1,5 +1,5 @@
 ; This should not copy the result of foo into an xmm register.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
 ; rdar://5689903
 
 declare double @foo()
diff --git a/test/CodeGen/X86/fp-stack-set-st1.ll b/test/CodeGen/X86/fp-stack-set-st1.ll
index 00a73ae..894897a 100644
--- a/test/CodeGen/X86/fp-stack-set-st1.ll
+++ b/test/CodeGen/X86/fp-stack-set-st1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fxch | count 2
+; RUN: llc < %s -march=x86 | grep fxch | count 2
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fp2sint.ll b/test/CodeGen/X86/fp2sint.ll
index 80f7efb..1675444 100644
--- a/test/CodeGen/X86/fp2sint.ll
+++ b/test/CodeGen/X86/fp2sint.ll
@@ -1,6 +1,6 @@
 ;; LowerFP_TO_SINT should not create a stack object if it's not needed.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep add
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
 
 define i32 @main(i32 %argc, i8** %argv) {
 cond_false.i.i.i:               ; preds = %bb.i5
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index f2017b9..8e823ed 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
 ; RUN:   grep {fadd\\|fsub\\|fdiv\\|fmul} | not grep -i ST
 
 ; Test that the load of the constant is folded into the operation.
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index 5452326..a160ac6 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fild | not grep ESP
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP
 
 define double @short(i16* %P) {
         %V = load i16* %P               ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 655ad3d..0145069 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
 
 ; Test that the load of the memory location is folded into the operation.
diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
index 4d25fca..6a8dbcf 100644
--- a/test/CodeGen/X86/fsxor-alignment.ll
+++ b/test/CodeGen/X86/fsxor-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
 ; RUN:  grep -v sp | grep xorps | count 2
 
 ; Don't fold the incoming stack arguments into the xorps instructions used
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 4a85779..68575bc 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-full-lsr >%t
+; RUN: llc < %s -march=x86 -enable-full-lsr >%t
 ; RUN: grep {addl	\\\$4,} %t | count 3
 ; RUN: not grep {,%} %t
 
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index aaa2f84..9f6d3f7 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
diff --git a/test/CodeGen/X86/global-sections-tls.ll b/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 0000000..2c23030
--- /dev/null
+++ b/test/CodeGen/X86/global-sections-tls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section		.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 0000000..38948a7
--- /dev/null
+++ b/test/CodeGen/X86/global-sections.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .section .gnu.linkonce.b.G1,"aw",@nobits
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+
+; _Complex long long const G4 = 34;
+@G4 = constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__const
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .ascii	"\001"
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .ascii "\001"
+
+
+@G7 = constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section		.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+
+@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section		.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.section        __TEXT,__const
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section		.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 41d9128..76ffd66 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index b38e0e4..98817f3 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index e867242..d30e6b3 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 6
 ; RUN: grep {movb	%ah, (%rsi)} %t | count 3
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep mov %t | count 3
 ; RUN: grep {movb	%ah, (%e} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 2777be9..878fd93 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
-; RUN: llvm-as < %s | llc -march=x86    > %t
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
+; RUN: llc < %s -march=x86    > %t
 ; RUN: grep {incb	%ah} %t | count 3
 ; RUN: grep {movzbl	%ah,} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 789f3dd..e97ebab 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
 ; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
 
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 5541583..16e13f8 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
 ; RUN: grep {shll	\$3,} %t | count 1
 
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index d353a22..8a0b07b 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define zeroext i8 @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/hidden-vis-2.ll b/test/CodeGen/X86/hidden-vis-2.ll
index e000547..74554d1 100644
--- a/test/CodeGen/X86/hidden-vis-2.ll
+++ b/test/CodeGen/X86/hidden-vis-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/hidden-vis-3.ll b/test/CodeGen/X86/hidden-vis-3.ll
index 81dc76e..4be881e 100644
--- a/test/CodeGen/X86/hidden-vis-3.ll
+++ b/test/CodeGen/X86/hidden-vis-3.ll
@@ -1,13 +1,17 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep long | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
index e6936de..a8aede5 100644
--- a/test/CodeGen/X86/hidden-vis-4.ll
+++ b/test/CodeGen/X86/hidden-vis-4.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep long
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
 
 @x = common hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/X86/hidden-vis.ll b/test/CodeGen/X86/hidden-vis.ll
index 058850c..a948bdf 100644
--- a/test/CodeGen/X86/hidden-vis.ll
+++ b/test/CodeGen/X86/hidden-vis.ll
@@ -1,20 +1,24 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | \
-; RUN:   grep .hidden | count 2
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin8.8.0 | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
 
diff --git a/test/CodeGen/X86/i128-and-beyond.ll b/test/CodeGen/X86/i128-and-beyond.ll
index db94b0e..907a6b8 100644
--- a/test/CodeGen/X86/i128-and-beyond.ll
+++ b/test/CodeGen/X86/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/X86/i128-immediate.ll b/test/CodeGen/X86/i128-immediate.ll
index 6939933..c47569e 100644
--- a/test/CodeGen/X86/i128-immediate.ll
+++ b/test/CodeGen/X86/i128-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | count 2
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
 
 define i128 @__addvti3() {
           ret i128 -1
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index f8c732e..e9d30d6 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
index 179a013..277f428 100644
--- a/test/CodeGen/X86/i128-ret.ll
+++ b/test/CodeGen/X86/i128-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rdi), %rdx}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rdi), %rdx}
+; RUN: llc < %s -march=x86-64 | grep {movq	(%rdi), %rax}
 
 define i128 @test(i128 *%P) {
         %A = load i128* %P
diff --git a/test/CodeGen/X86/i256-add.ll b/test/CodeGen/X86/i256-add.ll
index 280ed6b..5a7a7a7 100644
--- a/test/CodeGen/X86/i256-add.ll
+++ b/test/CodeGen/X86/i256-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep adcl %t | count 7
 ; RUN: grep sbbl %t | count 7
 
diff --git a/test/CodeGen/X86/i2k.ll b/test/CodeGen/X86/i2k.ll
index 712302d..6116c2e 100644
--- a/test/CodeGen/X86/i2k.ll
+++ b/test/CodeGen/X86/i2k.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
   %a = load i2011* %x
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index 0d2b29c..847e209 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
 
 ; Uses movsd to load / store i64 values if sse2 is available.
 
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index ca751ae..6a79ee8 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  |& \
+; RUN: llc < %s -march=x86-64 -stats  |& \
 ; RUN:   grep {6 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something at least as good as:
diff --git a/test/CodeGen/X86/illegal-asm.ll b/test/CodeGen/X86/illegal-asm.ll
index 03cc507..43128dc 100644
--- a/test/CodeGen/X86/illegal-asm.ll
+++ b/test/CodeGen/X86/illegal-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-linux        -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-linux        -disable-fp-elim
 ; XFAIL: *
 ; Expected to run out of registers during allocation.
 ; PR3864
diff --git a/test/CodeGen/X86/illegal-insert.ll b/test/CodeGen/X86/illegal-insert.ll
index 59773b2..dbf1b14 100644
--- a/test/CodeGen/X86/illegal-insert.ll
+++ b/test/CodeGen/X86/illegal-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x double> @foo0(<4 x double> %t) {
   %r = insertelement <4 x double> %t, double 2.3, i32 0
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 5ed6ddb..cecf77a 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
   %y = fmul <4 x double> %x, %z
diff --git a/test/CodeGen/X86/imp-def-copies.ll b/test/CodeGen/X86/imp-def-copies.ll
index 3d2f656..9117840 100644
--- a/test/CodeGen/X86/imp-def-copies.ll
+++ b/test/CodeGen/X86/imp-def-copies.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 
 	%struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
 	%struct.gs_fixed_point = type { i32, i32 }
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 0a2df1c..1cb54b3 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 6403a26..4e8e2af 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 declare i32 @foo()
 
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
index 6196294..4a2c7fc 100644
--- a/test/CodeGen/X86/inline-asm-2addr.ll
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 define i64 @t(i64 %a, i64 %b) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-R-constraint.ll b/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 0000000..66c27ac
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-R-constraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 3c536b7..51ea843 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext test 1 | grep j
+; RUN: llc -march=x86-64 < %s | FileCheck %s
 ; PR3701
 
 define i64 @t(i64* %arg) nounwind {
@@ -7,6 +7,8 @@ define i64 @t(i64* %arg) nounwind {
 ; <label>:1		; preds = %0
 	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
 	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
 	br i1 %2, label %4, label %5
 
 ; <label>:4		; preds = %1
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 31d94d8..09b0929 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define x86_fp80 @test1() {
         %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
index 9685618..ffa6ee6 100644
--- a/test/CodeGen/X86/inline-asm-fpstack2.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4185
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll
index ac89a1d..17945fe 100644
--- a/test/CodeGen/X86/inline-asm-fpstack3.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4459
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll
index c9122fa..bae2970 100644
--- a/test/CodeGen/X86/inline-asm-fpstack4.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4484
 
 declare x86_fp80 @ceil()
diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll
index 64f3788..8b219cf 100644
--- a/test/CodeGen/X86/inline-asm-fpstack5.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4485
 
 define void @test(x86_fp80* %a) {
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
index 97eac38..5e76b6c 100644
--- a/test/CodeGen/X86/inline-asm-modifier-n.ll
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep { 37}
+; RUN: llc < %s -march=x86 | grep { 37}
 ; rdar://7008959
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index ca39c12..78d7e77 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -1,8 +1,8 @@
 ; PR2094
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq
-; RUN: llvm-as < %s | llc -march=x86-64 | grep addps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddd
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll
index 01f1397..46966f5 100644
--- a/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
 ; PR3391
 
 @pci_indirect = external global { }             ; <{ }*> [#uses=1]
diff --git a/test/CodeGen/X86/inline-asm-pic.ll b/test/CodeGen/X86/inline-asm-pic.ll
index 04ad48d..0b5ff08 100644
--- a/test/CodeGen/X86/inline-asm-pic.ll
+++ b/test/CodeGen/X86/inline-asm-pic.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
 
 @main_q = internal global i8* null		; <i8**> [#uses=1]
 
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 0000000..ab44206
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @t() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 6df2c48..1f4a13f 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 12(%esp)} | count 2
 ; rdar://6992609
 
 target triple = "i386-apple-darwin9.0"
diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
index aafbbd1..5a9628b 100644
--- a/test/CodeGen/X86/inline-asm-x-scalar.ll
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 define void @test1() {
         tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index 02988fc..c66d7a8 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test1() nounwind {
 	; Dest is AX, dest type = i32.
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index 863cda9..2243f93 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @sqlite3ExprResolveNames() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
index 5c0b0d3..f2c9cc7 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movw
+; RUN: llc < %s -march=x86-64 | not grep movw
 
 define i16 @test5(i16 %f12) nounwind {
 	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index ee3ac66..e443085 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 11
+; RUN: llc < %s -march=x86-64 | grep mov | count 11
 
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -17,7 +17,7 @@
 	%struct.metrics = type { i16, i16, i16, i16, i16 }
 	%struct.rec = type { %struct.head_type }
 
-define void @FontChange(i1 %foo) {
+define void @FontChange(i1 %foo) nounwind {
 entry:
 	br i1 %foo, label %bb298, label %bb49
 bb49:		; preds = %entry
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 0eef517..34a29ca 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 95e17b4..18aade2 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86 -disable-mmx
 
 ; Test to check that we properly legalize an insert vector element
 define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
index 5c47f5e..77a9f7e 100644
--- a/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2098
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index 4e68b77..0f94b23 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movl	\$4, (.*,.*,4)}
 
 define i32 @test(i32* %X, i32 %B) {
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index 9d9c747..5ed0e00 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movb.7(%...)} %t
 ; RUN: not grep leal %t
 
diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
index 4e678c4..8d3d97a 100644
--- a/test/CodeGen/X86/isel-sink3.ll
+++ b/test/CodeGen/X86/isel-sink3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {addl.\$4, %ecx}
-; RUN: llvm-as < %s | llc | not grep leal
+; RUN: llc < %s | grep {addl.\$4, %ecx}
+; RUN: llc < %s | not grep leal
 ; this should not sink %1 into bb1, that would increase reg pressure.
 
 ; rdar://6399178
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 7acc5cc..507a328 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep xor %t
 ; RUN: grep jne %t | count 1
diff --git a/test/CodeGen/X86/isnan.ll b/test/CodeGen/X86/isnan.ll
index 65916ff..4d465c0 100644
--- a/test/CodeGen/X86/isnan.ll
+++ b/test/CodeGen/X86/isnan.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep call
+; RUN: llc < %s -march=x86 | not grep call
 
 declare i1 @llvm.isunordered.f64(double)
 
diff --git a/test/CodeGen/X86/isnan2.ll b/test/CodeGen/X86/isnan2.ll
index 18fe29a..7753346 100644
--- a/test/CodeGen/X86/isnan2.ll
+++ b/test/CodeGen/X86/isnan2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep pxor
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
 
 ; This should not need to materialize 0.0 to evaluate the condition.
 
diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
index 3799b9c..8adf723 100644
--- a/test/CodeGen/X86/ispositive.ll
+++ b/test/CodeGen/X86/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl.*31}
+; RUN: llc < %s -march=x86 | grep {shrl.*31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index a48f061..c695c29 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
+; RUN: llc < %s -march=x86-64 -o %t
 ; RUN: grep inc %t | count 1
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 13
 ; RUN: not grep addb %t
-; RUN: grep leaq %t | count 8
-; RUN: grep leal %t | count 4
+; RUN: grep leaq %t | count 9
+; RUN: grep leal %t | count 3
 ; RUN: grep movq %t | count 5
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index cb7d627..5e8e162 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep jns
+; RUN: llc < %s -march=x86 | grep jns
 
 define i32 @f(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/ldzero.ll b/test/CodeGen/X86/ldzero.ll
index 2db78a2..dab04bc 100644
--- a/test/CodeGen/X86/ldzero.ll
+++ b/test/CodeGen/X86/ldzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; verify PR 1700 is still fixed
 ; ModuleID = 'hh.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index a33b71c..6930350 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep add
 
 define i32 @test1(i32 %A, i32 %B) {
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index 39122bb..44413d6 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
+; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
 define i32 @test(i32 %a) {
         %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
         ret i32 %tmp2
 }
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
 define i64 @test2(i64 %a) {
         %tmp2 = shl i64 %a, 2
 	%tmp3 = or i64 %tmp2, %a
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 8f0835f..2171204c 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 390e35a..3f32fd2 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 12
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 30a477a..22a9644 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -1,9 +1,34 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86 | not grep orl
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i32 @test(i32 %x) {
-        %tmp1 = shl i32 %x, 3           ; <i32> [#uses=1]
-        %tmp2 = add i32 %tmp1, 7                ; <i32> [#uses=1]
+define i32 @test1(i32 %x) nounwind {
+        %tmp1 = shl i32 %x, 3
+        %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
+; CHECK: test1:
+; CHECK:    leal 7(,%rdi,8), %eax
 }
 
+
+; ISel the add of -4 with a neg and use an lea for the rest of the
+; arithemtic.
+define i32 @test2(i32 %x_offs) nounwind readnone {
+entry:
+	%t0 = icmp sgt i32 %x_offs, 4
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:
+	%tmp = add i32 %x_offs, -5
+	%tmp6 = lshr i32 %tmp, 2
+	%tmp7 = mul i32 %tmp6, -4
+	%tmp8 = add i32 %tmp7, %x_offs
+	%tmp9 = add i32 %tmp8, -4
+	ret i32 %tmp9
+
+bb2:
+	ret i32 %x_offs
+; CHECK: test2:
+; CHECK:	leal	-5(%rdi), %eax
+; CHECK:	andl	$-4, %eax
+; CHECK:	negl	%eax
+; CHECK:	leal	-4(%rdi,%rax), %eax
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 97654b2..574b46a 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -disable-mmx -o %t -f
-; RUN: grep divdi3  %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
+; RUN: grep {call.*divdi3}  %t | count 2
 
 
 ; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +12,4 @@
 define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
   %div.r = sdiv <2 x i64> %num, %div
   ret <2 x i64>  %div.r
-}                                     
-\ No newline at end of file
+}                                     
diff --git a/test/CodeGen/X86/lfence.ll b/test/CodeGen/X86/lfence.ll
index 0721d73..7a96ca3 100644
--- a/test/CodeGen/X86/lfence.ll
+++ b/test/CodeGen/X86/lfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/limited-prec.ll b/test/CodeGen/X86/limited-prec.ll
index 6afaea4..7bf4ac2 100644
--- a/test/CodeGen/X86/limited-prec.ll
+++ b/test/CodeGen/X86/limited-prec.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -limit-float-precision=6 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=12 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=18 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
 
 define float @f1(float %x) nounwind noinline {
diff --git a/test/CodeGen/X86/live-out-reg-info.ll b/test/CodeGen/X86/live-out-reg-info.ll
index b6fb7df..7132777 100644
--- a/test/CodeGen/X86/live-out-reg-info.ll
+++ b/test/CodeGen/X86/live-out-reg-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep testl
+; RUN: llc < %s -march=x86-64 | grep {testb	\[$\]1,}
 
 ; Make sure dagcombine doesn't eliminate the comparison due
 ; to an off-by-one bug with ComputeMaskedBits information.
diff --git a/test/CodeGen/X86/local-liveness.ll b/test/CodeGen/X86/local-liveness.ll
index 18d999b..321f208 100644
--- a/test/CodeGen/X86/local-liveness.ll
+++ b/test/CodeGen/X86/local-liveness.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep {subl	%eax, %edx}
+; RUN: llc < %s -march=x86 -regalloc=local | grep {subl	%eax, %edx}
 
 ; Local regalloc shouldn't assume that both the uses of the
 ; sub instruction are kills, because one of them is tied
diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
index 8d9ebfb..e0165fb 100644
--- a/test/CodeGen/X86/long-setcc.ll
+++ b/test/CodeGen/X86/long-setcc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep shr | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep shr | count 1
+; RUN: llc < %s -march=x86 | grep xor | count 1
 
 define i1 @t1(i64 %x) nounwind {
 	%B = icmp slt i64 %x, 0
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index a8e2c31..9a4c8f2 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep '4{(%...)}
+; RUN: llc < %s -march=x86 | not grep '4{(%...)}
 ; This should not load or store the top part of *P.
 
 define void @test(i64* %P) nounwind  {
diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
index 73284a4..b52066d 100644
--- a/test/CodeGen/X86/loop-hoist.ll
+++ b/test/CodeGen/X86/loop-hoist.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   grep L_Arr.non_lazy_ptr
-; RUN: llvm-as < %s | \
-; RUN:   llc -disable-post-RA-scheduler=true \
-; RUN:       -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   %prcontext L_Arr.non_lazy_ptr 1 | grep {4(%esp)}
+; LSR should hoist the load from the "Arr" stub out of the loop.
+
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK:    L_Arr$non_lazy_ptr
+; CHECK: LBB1_1:	## %cond_true
 
 @Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index 8ea5bdb..30b5114 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
+; RUN: llc < %s -march=x86 -relocation-model=pic | \
 ; RUN:   grep {, 4} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index b6bb814..70c9134 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 8737101..4cb56ca 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index 507a9e5..a1f38a7 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
 ;
 ; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
 
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index 4e95bdd..e340edd 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 240
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep cmp | grep 240
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 711f223..87b606f 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 64
-; RUN: llvm-as < %s | llc -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 | grep cmp | grep 64
+; RUN: llc < %s -march=x86 | not grep inc
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 6e037e2a..4ec2a02 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index fa8b57a..81da82e 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep inc
+; RUN: llc < %s -march=x86-64 | not grep inc
 
 define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce7.ll b/test/CodeGen/X86/loop-strength-reduce7.ll
index b6a130a..4b565a6 100644
--- a/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 target triple = "i386-apple-darwin9.6"
 	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 1846c7d..e14cd8a 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index c998268..474450a 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,4 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/lsr-negative-stride.ll b/test/CodeGen/X86/lsr-negative-stride.ll
index 28d041f..b08356c 100644
--- a/test/CodeGen/X86/lsr-negative-stride.ll
+++ b/test/CodeGen/X86/lsr-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep neg %t
 ; RUN: not grep sub.*esp %t
 ; RUN: not grep esi %t
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 00e1d69..4058989 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep inc %t | count 1
 ; RUN: not grep incw %t
 
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0bf347c..bc493bd 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index 639a7a6..f23c020 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep and %t | count 6
 ; RUN: grep movzb %t | count 6
 ; RUN: grep sar %t | count 12
diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
index 4d1ed1d..7796f0e 100644
--- a/test/CodeGen/X86/maskmovdqu.ll
+++ b/test/CodeGen/X86/maskmovdqu.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 | grep -i EDI
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
 ; rdar://6573467
 
 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 0fccc35..2dc939e 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
 
 	%struct.ParmT = type { [25 x i8], i8, i8* }
 @.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 97a2dd5..24530cd 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy | count 2
+; RUN: llc < %s -march=x86-64 | grep call.*memcpy | count 2
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
index a2b452d..d405068 100644
--- a/test/CodeGen/X86/memmove-0.ll
+++ b/test/CodeGen/X86/memmove-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
index 3b2debc..2057be8 100644
--- a/test/CodeGen/X86/memmove-1.ll
+++ b/test/CodeGen/X86/memmove-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
index 37bbe0b..68a9f4d 100644
--- a/test/CodeGen/X86/memmove-2.ll
+++ b/test/CodeGen/X86/memmove-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
index 2e692c7..d8a419c 100644
--- a/test/CodeGen/X86/memmove-3.ll
+++ b/test/CodeGen/X86/memmove-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
index f23c7d5..027db1f 100644
--- a/test/CodeGen/X86/memmove-4.ll
+++ b/test/CodeGen/X86/memmove-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep call
+; RUN: llc < %s | not grep call
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 2ad665c..7deb52f 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rep
-; RUN: llvm-as < %s | llc -march=x86 | grep memset
+; RUN: llc < %s | not grep rep
+; RUN: llc < %s | grep memset
+
+target triple = "i386"
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 564174c..cf7464d0 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
 
 	%struct.x = type { i16, i16 }
 
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index d76d4d4..da8fc51 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
 
 define void @bork() nounwind {
 entry:
diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll
index 6abdbce..a1b2283 100644
--- a/test/CodeGen/X86/mfence.ll
+++ b/test/CodeGen/X86/mfence.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
 
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 1df0e3a..7dcd84d 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -1,14 +1,12 @@
-; RUN: llvm-as < %s | llc -o %t -f
-; RUN: grep __alloca %t | count 2
-; RUN: grep 4294967288 %t
-; RUN: grep {pushl	%eax} %t
-; RUN: grep 8028 %t | count 2
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
 
-define void @foo1(i32 %N) {
+define void @foo1(i32 %N) nounwind {
 entry:
+; CHECK: _foo1:
+; CHECK: call __alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -16,8 +14,13 @@ entry:
 
 declare void @bar1(i32*)
 
-define void @foo2(i32 inreg  %N) {
+define void @foo2(i32 inreg  %N) nounwind {
 entry:
+; CHECK: _foo2:
+; CHECK: andl $-16, %esp
+; CHECK: pushl %eax
+; CHECK: call __alloca
+; CHECK: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @bar2( i32* %A2.sub, i32 %N )
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 9496cbb..426e98e 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
 ; On Darwin x86-32, v1i64 values are passed in memory.
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index aac614a..c42af08 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
 
 @g_v8qi = external global <8 x i8>
 
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index 501786e..e4dfdbf 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
 
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index c6bb489..1fd8f67 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 4
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
 
 define i64 @foo(<1 x i64>* %p) {
   %t = load <1 x i64>* %p
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 2047ce7..3607043 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
diff --git a/test/CodeGen/X86/mmx-emms.ll b/test/CodeGen/X86/mmx-emms.ll
index 60ba84d..5ff2588 100644
--- a/test/CodeGen/X86/mmx-emms.ll
+++ b/test/CodeGen/X86/mmx-emms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep emms
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
 define void @foo() {
 entry:
 	call void @llvm.x86.mmx.emms( )
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index 0aa476d..a063ee1 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
 
 define <2 x i32> @qux(i32 %A) nounwind {
 	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index f1d04fa..3af09f4 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 126fc9d..0af7e01 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
 
 define void @bork(<1 x i64>* %x) {
 entry:
diff --git a/test/CodeGen/X86/mmx-s2v.ll b/test/CodeGen/X86/mmx-s2v.ll
index 4ec2403..c98023c 100644
--- a/test/CodeGen/X86/mmx-s2v.ll
+++ b/test/CodeGen/X86/mmx-s2v.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 ; PR2574
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index 277cf07..dd0aa2c 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index 4b91cb9..e3125c7 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 ; PR1427
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 4dd1e47..8253c20 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
 
 	%struct.vS1024 = type { [8 x <4 x i32>] }
 	%struct.vS512 = type { [4 x <4 x i32>] }
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index 95f9579..d21e240 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
 
 define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/movfs.ll b/test/CodeGen/X86/movfs.ll
index af102d4..823e986 100644
--- a/test/CodeGen/X86/movfs.ll
+++ b/test/CodeGen/X86/movfs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fs
+; RUN: llc < %s -march=x86 | grep fs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index f621849..b04048b 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep gs
+; RUN: llc < %s -march=x86 | grep gs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index 487614f..eca9e6f 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 24576
+; RUN: llc < %s -march=x86 | grep 24576
 ; PR2135
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/mul-remat.ll b/test/CodeGen/X86/mul-remat.ll
index ffc8cc0..3fa0050 100644
--- a/test/CodeGen/X86/mul-remat.ll
+++ b/test/CodeGen/X86/mul-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 ; PR1874
 	
 define i32 @test(i32 %a, i32 %b) {
diff --git a/test/CodeGen/X86/mul-shift-reassoc.ll b/test/CodeGen/X86/mul-shift-reassoc.ll
index f0ecb5b..3777d8b 100644
--- a/test/CodeGen/X86/mul-shift-reassoc.ll
+++ b/test/CodeGen/X86/mul-shift-reassoc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | not grep add
 
 define i32 @test(i32 %X, i32 %Y) {
 	; Push the shl through the mul to allow an LEA to be formed, instead
diff --git a/test/CodeGen/X86/mul128.ll b/test/CodeGen/X86/mul128.ll
index c0ce6b3..6825b99 100644
--- a/test/CodeGen/X86/mul128.ll
+++ b/test/CodeGen/X86/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 3
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
 
 define i128 @foo(i128 %t, i128 %u) {
   %k = mul i128 %t, %u
diff --git a/test/CodeGen/X86/mul64.ll b/test/CodeGen/X86/mul64.ll
index cd0f802..5a25c5d 100644
--- a/test/CodeGen/X86/mul64.ll
+++ b/test/CodeGen/X86/mul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 3
 
 define i64 @foo(i64 %t, i64 %u) {
   %k = mul i64 %t, %u
diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
index f632b87..e9837d0 100644
--- a/test/CodeGen/X86/multiple-return-values-cross-block.ll
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 declare {x86_fp80, x86_fp80} @test()
 
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
index 5f7a83f..018d997 100644
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ b/test/CodeGen/X86/multiple-return-values.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 96cac0d..0b56644 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; RUN: grep 2147027116 %t | count 3
 ; RUN: grep 2147228864 %t | count 3
 ; RUN: grep 2146502828 %t | count 3
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
index 0ee11b4..18f1108 100644
--- a/test/CodeGen/X86/narrow_op-1.ll
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | grep 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | grep 16842752
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = common global %struct.bf zeroinitializer, align 16
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
index b441794..796ef7a 100644
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ b/test/CodeGen/X86/narrow_op-2.ll
@@ -1,12 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 254
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 253
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = external global %struct.bf*
 
 define void @t1() nounwind ssp {
 entry:
+
+; CHECK: andb	$-2, 10(
+; CHECK: andb	$-3, 10(
+
 	%0 = load %struct.bf** @bfi, align 8
 	%1 = getelementptr %struct.bf* %0, i64 0, i32 1
 	%2 = bitcast i16* %1 to i32*
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 1a7ee08..57164f2 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep xorps %t | count 1
 
 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
@@ -9,4 +9,4 @@ entry:
 	%sub = fsub float %a, %b		; <float> [#uses=1]
 	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
 	ret float %neg
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 689639f..c3f412e 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | not grep xor
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
 ; PR3374
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 8cc1bec..7842eb8 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86-64 | \
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
 ; RUN:   not egrep {addsd|subsd|xor}
 
 declare double @sin(double %f)
@@ -6,7 +6,7 @@ declare double @sin(double %f)
 define double @foo(double %e)
 {
   %f = fsub double 0.0, %e
-  %g = call double @sin(double %f)
+  %g = call double @sin(double %f) readonly
   %h = fsub double 0.0, %g
   ret double %h
 }
diff --git a/test/CodeGen/X86/negative-subscript.ll b/test/CodeGen/X86/negative-subscript.ll
index f2bd315..28f7d6b 100644
--- a/test/CodeGen/X86/negative-subscript.ll
+++ b/test/CodeGen/X86/negative-subscript.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6559995
 
 @a = external global [255 x i8*], align 32
diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
index 3c47b8f..29474c2 100644
--- a/test/CodeGen/X86/negative_zero.ll
+++ b/test/CodeGen/X86/negative_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | grep fchs
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
 
 
 define double @T() {
diff --git a/test/CodeGen/X86/nobt.ll b/test/CodeGen/X86/nobt.ll
index 5529428..35090e37 100644
--- a/test/CodeGen/X86/nobt.ll
+++ b/test/CodeGen/X86/nobt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep btl
+; RUN: llc < %s -march=x86 | not grep btl
 
 ; This tests some cases where BT must not be generated.  See also bt.ll.
 ; Fixes 20040709-[12].c in gcc testsuite.
diff --git a/test/CodeGen/X86/nofence.ll b/test/CodeGen/X86/nofence.ll
index 132ac94..244d2e9 100644
--- a/test/CodeGen/X86/nofence.ll
+++ b/test/CodeGen/X86/nofence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep fence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll
index 457b66b..0ec03eb 100644
--- a/test/CodeGen/X86/omit-label.ll
+++ b/test/CodeGen/X86/omit-label.ll
@@ -1,7 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep BB1_1:
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s
 ; PR4126
+; PR4732
 
-; Don't omit this label's definition.
+; Don't omit these labels' definitions.
+
+; CHECK: bux:
+; CHECK: LBB1_1:
 
 define void @bux(i32 %p_53) nounwind optsize {
 entry:
@@ -21,3 +25,33 @@ bb3:		; preds = %bb.i, %entry
 }
 
 declare i32 @baz(...)
+
+; Don't omit this label in the assembly output.
+; CHECK: int321:
+; CHECK: LBB2_1
+; CHECK: LBB2_1
+; CHECK: LBB2_1:
+
+define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone {
+entry:
+  %tobool = icmp eq i8 %p_103, 0                  ; <i1> [#uses=1]
+  %cmp.i = icmp sgt i8 %p_103, 0                  ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %cmp.i               ; <i1> [#uses=1]
+  br i1 %or.cond, label %land.end.i, label %for.cond.preheader
+
+land.end.i:                                       ; preds = %entry
+  %conv3.i = sext i8 %p_103 to i32                ; <i32> [#uses=1]
+  %div.i = sdiv i32 1, %conv3.i                   ; <i32> [#uses=1]
+  %tobool.i = icmp eq i32 %div.i, -2147483647     ; <i1> [#uses=0]
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %land.end.i, %entry
+  %cmp = icmp sgt i8 %p_103, 1                    ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end.split, label %for.cond
+
+for.cond:                                         ; preds = %for.cond.preheader, %for.cond
+  br label %for.cond
+
+for.end.split:                                    ; preds = %for.cond.preheader
+  ret void
+}
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 322850c..fa2aef5 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movw | count 1
+; RUN: llc < %s -march=x86 | grep movw | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 90c1456..162c7a5 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cmov
+; RUN: llc < %s -march=x86 | not grep cmov
 
 ; LSR should be able to eliminate the max computations by
 ; making the loops use slt/ult comparisons instead of ne comparisons.
diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
index 084e181..ad6c24d 100644
--- a/test/CodeGen/X86/optimize-max-1.ll
+++ b/test/CodeGen/X86/optimize-max-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep cmov
+; RUN: llc < %s -march=x86-64 | not grep cmov
 
 ; LSR should be able to eliminate both smax and umax expressions
 ; in loop trip counts.
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index effc3fc..8851c5b1a 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep cmov %t | count 2
 ; RUN: grep jne %t | count 1
 
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 20886d5..9ebf890 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index 7584a70..c1fc041 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -6,7 +6,7 @@
 
 ; Check that the shift gets turned into an LEA.
 
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov E.X, E.X}
 
 @G = external global i32                ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/packed_struct.ll b/test/CodeGen/X86/packed_struct.ll
index 2a781e7..da6e8f8 100644
--- a/test/CodeGen/X86/packed_struct.ll
+++ b/test/CodeGen/X86/packed_struct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep foos+5 %t
 ; RUN: grep foos+1 %t
 ; RUN: grep foos+9 %t
@@ -15,7 +15,7 @@ target triple = "i686-pc-linux-gnu"
 @foos = external global %struct.anon		; <%struct.anon*> [#uses=3]
 @bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
 	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
@@ -25,7 +25,7 @@ entry:
 	ret i32 %tmp7
 }
 
-define i8 @bar() {
+define i8 @bar() nounwind {
 entry:
 	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
 	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index 8dcd23a..e521d8e 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep test %t
 
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index 85e3bf2..f83f0f6 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep dec %t | count 1
 ; RUN: not grep test %t
 ; RUN: not grep cmp %t
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index 788f610..2745172 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep testl
+; RUN: llc < %s -march=x86 | grep testl
 
 ; It's tempting to eliminate the testl instruction here and just use the
 ; EFLAGS value from the incl, however it can't be known whether the add
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 0000000..13a69ed
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %ecx, %edx
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testl
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index e6c88bb..e4ab2b5 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
 
 define float @foo(<8 x float> %a) nounwind {
   %c = extractelement <8 x float> %a, i32 3
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 77332d0..5e18044 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
new file mode 100644
index 0000000..5acf04c
--- /dev/null
+++ b/test/CodeGen/X86/personality.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+	invoke void @_Z1gv( )
+			to label %return unwind label %unwind
+
+unwind:		; preds = %entry
+	br i1 false, label %eh_then, label %cleanup20
+
+eh_then:		; preds = %unwind
+	invoke void @__cxa_end_catch( )
+			to label %return unwind label %unwind10
+
+unwind10:		; preds = %eh_then
+	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
+	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %filter, label %cleanup20
+
+filter:		; preds = %unwind10
+	unreachable
+
+cleanup20:		; preds = %unwind10, %unwind
+	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
+	ret void
+
+return:		; preds = %eh_then, %entry
+	ret void
+}
+
+declare void @_Z1gv()
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
+
+declare void @__gxx_personality_v0()
+
+declare void @__cxa_end_catch()
+
+; X64: Leh_frame_common_begin:
+; X64: .long	___gxx_personality_v0@GOTPCREL+4
+
+; X32: Leh_frame_common_begin:
+; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
+; ....
+
+; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32: L___gxx_personality_v0$non_lazy_ptr:
+; X32:   .indirect_symbol ___gxx_personality_v0
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 7ca3ea8..23c509c 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 3bbc55d..2c855ce 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
 
 	%struct.dpoint = type { double, double }
 
diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll
index cb4e640..7729752 100644
--- a/test/CodeGen/X86/pic-load-remat.ll
+++ b/test/CodeGen/X86/pic-load-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
 
 define void @f() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
new file mode 100644
index 0000000..3a547f9
--- /dev/null
+++ b/test/CodeGen/X86/pic.ll
@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false | FileCheck %s -check-prefix=LINUX
+
+@ptr = external global i32* 
+@dst = external global i32 
+@src = external global i32 
+
+define void @test1() nounwind {
+entry:
+    store i32* @dst, i32** @ptr
+    %tmp.s = load i32* @src
+    store i32 %tmp.s, i32* @dst
+    ret void
+    
+; LINUX:    test1:
+; LINUX:	call	.L1$pb
+; LINUX-NEXT: .L1$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref1-.L1$pb),
+; LINUX:	movl	dst@GOT(%eax),
+; LINUX:	movl	ptr@GOT(%eax),
+; LINUX:	movl	src@GOT(%eax),
+; LINUX:	ret
+}
+
+@ptr2 = global i32* null
+@dst2 = global i32 0
+@src2 = global i32 0
+
+define void @test2() nounwind {
+entry:
+    store i32* @dst2, i32** @ptr2
+    %tmp.s = load i32* @src2
+    store i32 %tmp.s, i32* @dst2
+    ret void
+    
+; LINUX: test2:
+; LINUX:	call	.L2$pb
+; LINUX-NEXT: .L2$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref2-.L2$pb), %eax
+; LINUX:	movl	dst2@GOT(%eax),
+; LINUX:	movl	ptr2@GOT(%eax),
+; LINUX:	movl	src2@GOT(%eax),
+; LINUX:	ret
+
+}
+
+declare i8* @malloc(i32)
+
+define void @test3() nounwind {
+entry:
+    %ptr = call i8* @malloc(i32 40)
+    ret void
+; LINUX: test3:
+; LINUX: 	pushl	%ebx
+; LINUX-NEXT: 	subl	$8, %esp
+; LINUX-NEXT: 	call	.L3$pb
+; LINUX-NEXT: .L3$pb:
+; LINUX-NEXT: 	popl	%ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref3-.L3$pb), %ebx
+; LINUX: 	movl	$40, (%esp)
+; LINUX: 	call	malloc@PLT
+; LINUX: 	addl	$8, %esp
+; LINUX: 	popl	%ebx
+; LINUX: 	ret
+}
+
+@pfoo = external global void(...)* 
+
+define void @test4() nounwind {
+entry:
+    %tmp = call void(...)*(...)* @afoo()
+    store void(...)* %tmp, void(...)** @pfoo
+    %tmp1 = load void(...)** @pfoo
+    call void(...)* %tmp1()
+    ret void
+; LINUX: test4:
+; LINUX: 	call	.L4$pb
+; LINUX-NEXT: .L4$pb:
+; LINUX: 	popl
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref4-.L4$pb),
+; LINUX: 	movl	pfoo@GOT(%esi),
+; LINUX: 	call	afoo@PLT
+; LINUX: 	call	*
+}
+
+declare void(...)* @afoo(...)
+
+define void @test5() nounwind {
+entry:
+    call void(...)* @foo()
+    ret void
+; LINUX: test5:
+; LINUX: call	.L5$pb
+; LINUX: popl	%ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref5-.L5$pb), %ebx
+; LINUX: call	foo@PLT
+}
+
+declare void @foo(...)
+
+
+@ptr6 = internal global i32* null
+@dst6 = internal global i32 0
+@src6 = internal global i32 0
+
+define void @test6() nounwind {
+entry:
+    store i32* @dst6, i32** @ptr6
+    %tmp.s = load i32* @src6
+    store i32 %tmp.s, i32* @dst6
+    ret void
+    
+; LINUX: test6:
+; LINUX: 	call	.L6$pb
+; LINUX-NEXT: .L6$pb:
+; LINUX-NEXT: 	popl	%eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref6-.L6$pb), %eax
+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)
+; LINUX: 	ret
+}
+
+
+;; Test constant pool references.
+define double @test7(i32 %a.u) nounwind {
+entry:
+    %tmp = icmp eq i32 %a.u,0
+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
+    ret double %retval
+
+; LINUX: .LCPI7_0:
+
+; LINUX: test7:
+; LINUX:    call .L7$pb
+; LINUX: .L7$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref7-.L7$pb), 
+; LINUX:    fldl	.LCPI7_0@GOTOFF(
+}
+
+
+;; Test jump table references.
+define void @test8(i32 %n.u) nounwind {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+    
+; LINUX: test8:
+; LINUX:   call	.L8$pb
+; LINUX: .L8$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref8-.L8$pb),
+; LINUX:   addl	.LJTI8_0@GOTOFF(
+; LINUX:   jmpl	*%ecx
+
+; LINUX: .LJTI8_0:
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+; LINUX:   .long	 .LBB8_3@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 04245d1..b3750c1 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
-; RUN: llvm-as < %s | llc                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
 
 declare void @_Z3bari(i32)
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index e00d1e5..e2746a8 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 12
 
diff --git a/test/CodeGen/X86/postalloc-coalescing.ll b/test/CodeGen/X86/postalloc-coalescing.ll
index 9c44a5a..a171436 100644
--- a/test/CodeGen/X86/postalloc-coalescing.ll
+++ b/test/CodeGen/X86/postalloc-coalescing.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @_Z18yy_get_next_bufferv() {
 entry:
diff --git a/test/CodeGen/X86/pr1462.ll b/test/CodeGen/X86/pr1462.ll
index 7f9037a..62549a5 100644
--- a/test/CodeGen/X86/pr1462.ll
+++ b/test/CodeGen/X86/pr1462.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1462
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
index 10fa96a..c9e24bf 100644
--- a/test/CodeGen/X86/pr1489.ll
+++ b/test/CodeGen/X86/pr1489.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 3058016715 | count 1
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
 ;; magic constants are 3.999f and half of 3.999
 ; ModuleID = '1489.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 @.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @quux() {
+define i32 @quux() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -16,7 +16,7 @@ entry:
 
 declare i32 @lrintf(float)
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -26,7 +26,7 @@ entry:
 
 declare i32 @lrint(double)
 
-define i32 @bar() {
+define i32 @bar() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @baz() {
+define i32 @baz() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @main() {
+define i32 @main() nounwind {
 entry:
 	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
 	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/pr1505.ll b/test/CodeGen/X86/pr1505.ll
index e9e3d90..883a806 100644
--- a/test/CodeGen/X86/pr1505.ll
+++ b/test/CodeGen/X86/pr1505.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | not grep fldl
+; RUN: llc < %s -mcpu=i486 | not grep fldl
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index c70e327..12736cd 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 3
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr2177.ll b/test/CodeGen/X86/pr2177.ll
index b03c990..e941bf7 100644
--- a/test/CodeGen/X86/pr2177.ll
+++ b/test/CodeGen/X86/pr2177.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2177
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index f65725d..f97663c 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {addl	\$3, (%eax)} | count 4
+; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
 ; PR2182
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
index 6cf750c..f82dcb5 100644
--- a/test/CodeGen/X86/pr2326.ll
+++ b/test/CodeGen/X86/pr2326.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sete
+; RUN: llc < %s -march=x86 | grep sete
 ; PR2326
 
 define i32 @func_59(i32 %p_60) nounwind  {
diff --git a/test/CodeGen/X86/pr2623.ll b/test/CodeGen/X86/pr2623.ll
index 51c86b7..5d0eb5d 100644
--- a/test/CodeGen/X86/pr2623.ll
+++ b/test/CodeGen/X86/pr2623.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2623
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 96976b8..afd7114 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
 ; PR2656
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index 00e6e7b..0760e4c 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
 ; PR2659
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2849.ll b/test/CodeGen/X86/pr2849.ll
index 673598f..0fec481 100644
--- a/test/CodeGen/X86/pr2849.ll
+++ b/test/CodeGen/X86/pr2849.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2849
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2924.ll b/test/CodeGen/X86/pr2924.ll
index 2cab563..b9e8dc1 100644
--- a/test/CodeGen/X86/pr2924.ll
+++ b/test/CodeGen/X86/pr2924.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2924
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
index f5dc1f4..3f9a595 100644
--- a/test/CodeGen/X86/pr2982.ll
+++ b/test/CodeGen/X86/pr2982.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2982
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index 73f5101..18df97c 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
 ; PR3154
 
 define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index fdc814e..38c9f32 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {sar.	\$5}
+; RUN: llc < %s -march=x86 | grep {sar.	\$5}
 
 @foo = global i8 127
 
diff --git a/test/CodeGen/X86/pr3241.ll b/test/CodeGen/X86/pr3241.ll
index 665a763f..2f7917b 100644
--- a/test/CodeGen/X86/pr3241.ll
+++ b/test/CodeGen/X86/pr3241.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3241
 
 @g_620 = external global i32
diff --git a/test/CodeGen/X86/pr3243.ll b/test/CodeGen/X86/pr3243.ll
index 7be887b..483b5bf 100644
--- a/test/CodeGen/X86/pr3243.ll
+++ b/test/CodeGen/X86/pr3243.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3243
 
 declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
index 0765f86..2598c2f 100644
--- a/test/CodeGen/X86/pr3244.ll
+++ b/test/CodeGen/X86/pr3244.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3244
 
 @g_62 = external global i16             ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
index dce154f..cccbf54 100644
--- a/test/CodeGen/X86/pr3250.ll
+++ b/test/CodeGen/X86/pr3250.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3250
 
 declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
index aa5ee7c..9d6626b 100644
--- a/test/CodeGen/X86/pr3317.ll
+++ b/test/CodeGen/X86/pr3317.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3317
 
         %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
index a6f3e92..f813e2e 100644
--- a/test/CodeGen/X86/pr3366.ll
+++ b/test/CodeGen/X86/pr3366.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl
+; RUN: llc < %s -march=x86 | grep movzbl
 ; PR3366
 
 define void @_ada_c34002a() nounwind {
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index d4a9810..f7af927 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep fstpt
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
 ; PR3457
 ; rdar://6548010
 
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index f67ff75..1372a15 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
 
 target triple = "i386-apple-darwin9.6"
 	%struct.constraintVCGType = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index ca6204c..4b62bf4 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
+; RUN: llc < %s -march=x86 -stats |& grep {Number of reloads omited} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of available reloads turned into copies} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 40
 ; PR3495
 ; The loop reversal kicks in once here, resulting in one fewer instruction.
 
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index f743700..7cdeaa0 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index 4f9a582..e89b507 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 ; XFAIL: *
 
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index 60297e9..db039bd 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
new file mode 100644
index 0000000..0a9f4e3
--- /dev/null
+++ b/test/CodeGen/X86/pre-split11.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+
+@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
+@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+; CHECK: movsd %xmm0, (%rsp)
+entry:
+  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
+  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
+  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
+  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
+  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
+  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
+  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
+  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare double @atof(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
index 2009ad8..ba902f9 100644
--- a/test/CodeGen/X86/pre-split2.ll
+++ b/test/CodeGen/X86/pre-split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | count 2
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
index f34f144..2e31420 100644
--- a/test/CodeGen/X86/pre-split3.ll
+++ b/test/CodeGen/X86/pre-split3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index a570f73..10cef27 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index b83003f..8def460 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 target triple = "i386-apple-darwin9.5"
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index e771b80..d38e630 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index cd9d205..0b81c0b 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 @object_distance = external global double, align 8		; <double*> [#uses=1]
 @axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index 2259819..ea4b949 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index 1be960f..c27d925 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index d6517f7..fac5915 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse > %t
+; RUN: llc < %s -march=x86 -mattr=+sse > %t
 ; RUN: grep prefetchnta %t
 ; RUN: grep prefetcht0 %t
 ; RUN: grep prefetcht1 %t
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index 7478128..8aa744e 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
 ; Quote should be outside of private prefix.
 ; rdar://6855766x
 
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index caf1035..22b6f35 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -1,9 +1,9 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lfoo:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lbaz:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
 
 declare void @foo()
 
diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 0000000..72a428e
--- /dev/null
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	((r) & 4294967295)
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll
index f5d947f..f21a44c 100644
--- a/test/CodeGen/X86/rdtsc.ll
+++ b/test/CodeGen/X86/rdtsc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep rdtsc
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rdtsc
+; RUN: llc < %s -march=x86 | grep rdtsc
+; RUN: llc < %s -march=x86-64 | grep rdtsc
 declare i64 @llvm.readcyclecounter()
 
 define i64 @foo() {
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index 60e16b0..1ffb4e3 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
-; RUN: not grep subq %t
-; RUN: not grep addq %t
-; RUN: grep {\\-4(%%rsp)} %t | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-red-zone > %t
-; RUN: grep subq %t | count 1
-; RUN: grep addq %t | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
 define x86_fp80 @f0(float %f) nounwind readnone {
 entry:
 	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %0
 }
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index dea7d7e..9557d17 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 6d8cfbb..e0b5f7a 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,7 +1,7 @@
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   not grep {Number of register spills}
 
 ;; This can be compiled to use three registers if the loads are not
diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
index 3e17fc0..1b2af4b 100644
--- a/test/CodeGen/X86/rem-2.ll
+++ b/test/CodeGen/X86/rem-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cltd
+; RUN: llc < %s -march=x86 | not grep cltd
 
 define i32 @test(i32 %X) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index bba1f9b..394070e 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep div
+; RUN: llc < %s -march=x86 | not grep div
 
 define i32 @test1(i32 %X) {
         %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index 8dfed5e..3e81320 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/remat-mov-1.ll b/test/CodeGen/X86/remat-mov-1.ll
index 98b7bb4..d71b7a5 100644
--- a/test/CodeGen/X86/remat-mov-1.ll
+++ b/test/CodeGen/X86/remat-mov-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 0000000..790ae83
--- /dev/null
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.zero} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}
diff --git a/test/CodeGen/X86/ret-addr.ll b/test/CodeGen/X86/ret-addr.ll
index 06a10c6..b7b57ab 100644
--- a/test/CodeGen/X86/ret-addr.ll
+++ b/test/CodeGen/X86/ret-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86 | not grep xor
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86-64 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
 
 define i8* @h() nounwind readnone optsize {
 entry:
diff --git a/test/CodeGen/X86/ret-i64-0.ll b/test/CodeGen/X86/ret-i64-0.ll
index c59e4cf..bca0f05 100644
--- a/test/CodeGen/X86/ret-i64-0.ll
+++ b/test/CodeGen/X86/ret-i64-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
+; RUN: llc < %s -march=x86 | grep xor | count 2
 
 define i64 @foo() nounwind {
   ret i64 0
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 178ff4e..04b57dd 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
diff --git a/test/CodeGen/X86/rip-rel-address.ll b/test/CodeGen/X86/rip-rel-address.ll
index 2c0926a..24ff07b 100644
--- a/test/CodeGen/X86/rip-rel-address.ll
+++ b/test/CodeGen/X86/rip-rel-address.ll
@@ -1,7 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static | grep {a(%rip)}
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
 
 @a = internal global double 3.4
 define double @foo() nounwind {
   %a = load double* @a
   ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
 }
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index b800e09..276f8bb 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep rodata | count 3
-; RUN: llvm-as < %s | llc -relocation-model=static | grep -F "rodata.cst" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep rodata | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel" | count 4
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.local" | count 1
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
index c196ce2..42ece47 100644
--- a/test/CodeGen/X86/rot16.ll
+++ b/test/CodeGen/X86/rot16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 7cebcb8..655ed27 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index 2408359..4e082bb 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
index c567c0d..1e20273 100644
--- a/test/CodeGen/X86/rotate.ll
+++ b/test/CodeGen/X86/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {ro\[rl\]} | count 12
 
 define i32 @rotl32(i32 %A, i8 %Amt) {
diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll
index 40e954c..2eea399 100644
--- a/test/CodeGen/X86/rotate2.ll
+++ b/test/CodeGen/X86/rotate2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rol | count 2
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
 
 define i64 @test1(i64 %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll
index 172c424..2845838 100644
--- a/test/CodeGen/X86/scalar-extract.ll
+++ b/test/CodeGen/X86/scalar-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
 ; RUN: not grep movq  %t
 
 ; Check that widening doesn't introduce a mmx register in this case when
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index 6a6283a..fe40758 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -1,20 +1,20 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep min | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep max | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep min | count 1
+; RUN: llc < %s -march=x86-64 | grep max | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 declare float @bar()
 
-define float @foo(float %a)
+define float @foo(float %a) nounwind
 {
   %s = call float @bar()
   %t = fcmp olt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
-define float @hem(float %a)
+define float @hem(float %a) nounwind
 {
   %s = call float @bar()
-  %t = fcmp uge float %s, %a
+  %t = fcmp ogt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
diff --git a/test/CodeGen/X86/scalar_sse_minmax.ll b/test/CodeGen/X86/scalar_sse_minmax.ll
index 8c030b8..bc4ab5d 100644
--- a/test/CodeGen/X86/scalar_sse_minmax.ll
+++ b/test/CodeGen/X86/scalar_sse_minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep mins | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep maxs | count 2
 
 declare i1 @llvm.isunordered.f64(double, double)
diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
index a07f939..f6b29ec 100644
--- a/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/test/CodeGen/X86/scalarize-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
index b253dd9..81c919f 100644
--- a/test/CodeGen/X86/scev-interchange.ll
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
 	%"struct.DataOutBase::GmvFlags" = type { i32 }
 	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
-	%"struct.FE_Q<3>" = type { %"struct.FE_DGPNonparametric<3>", %"struct.std::vector<int,std::allocator<int> >" }
 	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.Line = type { [2 x i32] }
 	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
@@ -12,9 +10,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
 	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
 	%"struct.TableIndices<2>" = type { %struct.Line }
-	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-	%struct.pthread_attr_t = type { i64, [48 x i8] }
-	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
 	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
 	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
 	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
@@ -34,21 +29,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_init		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.DataOutBase::GmvFlags"*, i32)* @pthread_mutexattr_settype		; <i32 (%"struct.DataOutBase::GmvFlags"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_destroy		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-
 declare void @_Unwind_Resume(i8*)
 
 declare i8* @_Znwm(i64)
@@ -71,7 +51,7 @@ declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vecto
 
 declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
 
-define fastcc void @_ZN4FE_QILi3EEC1Ej(%"struct.FE_Q<3>"* %this, i32 %degree) {
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
 entry:
 	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
 			to label %invcont.i unwind label %lpad.i
@@ -356,31 +336,3 @@ lpad204.i:		; preds = %invcont86.i
 }
 
 declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
-
-declare i32 @pthread_once(i32*, void ()*)
-
-declare i8* @pthread_getspecific(i32)
-
-declare i32 @pthread_setspecific(i32, i8*)
-
-declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare i32 @pthread_cancel(i64)
-
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare i32 @pthread_key_delete(i32)
-
-declare i32 @pthread_mutexattr_init(%"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_mutexattr_settype(%"struct.DataOutBase::GmvFlags"*, i32)
-
-declare i32 @pthread_mutexattr_destroy(%"struct.DataOutBase::GmvFlags"*)
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
index 70785e9..c38a020 100644
--- a/test/CodeGen/X86/select-zero-one.ll
+++ b/test/CodeGen/X86/select-zero-one.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
 
 @r1 = weak global i32 0
 
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index e5d6101..95ed9e9 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah  | not grep set
+; RUN: llc < %s -march=x86 -mcpu=pentium 
+; RUN: llc < %s -march=x86 -mcpu=yonah 
+; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
 
 define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
 	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
index 25a2b7e..4a9c1ba 100644
--- a/test/CodeGen/X86/setoeq.ll
+++ b/test/CodeGen/X86/setoeq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86  | grep set | count 2
-; RUN: llvm-as < %s | llc -march=x86  | grep and
+; RUN: llc < %s -march=x86  | grep set | count 2
+; RUN: llc < %s -march=x86  | grep and
 
 define zeroext i8 @t(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/setuge.ll b/test/CodeGen/X86/setuge.ll
index 3f1d882..4ca2f18 100644
--- a/test/CodeGen/X86/setuge.ll
+++ b/test/CodeGen/X86/setuge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 declare i1 @llvm.isunordered.f32(float, float)
 
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index a6d1080..c9b39d3 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | grep movsbl
 
 define i32 @foo(i32 %X) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sext-ret-val.ll b/test/CodeGen/X86/sext-ret-val.ll
index 946e6c7..da1a187 100644
--- a/test/CodeGen/X86/sext-ret-val.ll
+++ b/test/CodeGen/X86/sext-ret-val.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
 ; rdar://6699246
 
 define signext i8 @t1(i8* %A) nounwind readnone ssp {
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
index 839ebc2..4aca040 100644
--- a/test/CodeGen/X86/sext-select.ll
+++ b/test/CodeGen/X86/sext-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsw
+; RUN: llc < %s -march=x86 | grep movsw
 ; PR2139
 
 declare void @abort()
diff --git a/test/CodeGen/X86/sext-trunc.ll b/test/CodeGen/X86/sext-trunc.ll
index 97b4666..2eaf425 100644
--- a/test/CodeGen/X86/sext-trunc.ll
+++ b/test/CodeGen/X86/sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep movsbl %t
 ; RUN: not grep movz %t
 ; RUN: not grep and %t
diff --git a/test/CodeGen/X86/sfence.ll b/test/CodeGen/X86/sfence.ll
index fc75ccb..4782879 100644
--- a/test/CodeGen/X86/sfence.ll
+++ b/test/CodeGen/X86/sfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b6d78a4..fd278c2 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep and 
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
        %shamt = and i32 %t, 31
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index 4662628..d38f9a8 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {shld.*CL}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov CL, BL}
 
 ; PR687
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index deb4ed1..4cba183 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 | \
+; RUN: llc < %s -relocation-model=static -march=x86 | \
 ; RUN:   grep {shll	\$3} | count 2
 
 ; This should produce two shll instructions, not any lea's.
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index 543bb22..e443ac1 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
index 24017fe..5adee7c 100644
--- a/test/CodeGen/X86/shift-double.ll
+++ b/test/CodeGen/X86/shift-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {sh\[lr\]d} | count 5
 
 define i64 @test1(i64 %X, i8 %C) {
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d268232..872817f 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep {s\[ah\]\[rl\]l} | count 1
 
 define i32* @test1(i32* %P, i32 %X) {
diff --git a/test/CodeGen/X86/shift-i128.ll b/test/CodeGen/X86/shift-i128.ll
index fc22a3c..c4d15ae 100644
--- a/test/CodeGen/X86/shift-i128.ll
+++ b/test/CodeGen/X86/shift-i128.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
 entry:
diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll
index 4a29b86..d5f65a6 100644
--- a/test/CodeGen/X86/shift-i256.ll
+++ b/test/CodeGen/X86/shift-i256.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/shift-one.ll b/test/CodeGen/X86/shift-one.ll
index dd49b7e..0f80f90 100644
--- a/test/CodeGen/X86/shift-one.ll
+++ b/test/CodeGen/X86/shift-one.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep leal
+; RUN: llc < %s -march=x86 | not grep leal
 
 @x = external global i32                ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 0000000..ce4f538
--- /dev/null
+++ b/test/CodeGen/X86/shift-parts.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index d3616f4..4458891 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	8(.esp), %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl	.eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {movswl	.ax, .eax}
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
+; RUN: llc < %s -march=x86 | grep {shrl	.eax}
+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
index 3406aee..49b9fa3 100644
--- a/test/CodeGen/X86/shrink-fp-const1.ll
+++ b/test/CodeGen/X86/shrink-fp-const1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
 ; PR1264
 
 define double @foo(double %x) {
diff --git a/test/CodeGen/X86/shrink-fp-const2.ll b/test/CodeGen/X86/shrink-fp-const2.ll
index 7e48b1b..3d5203b 100644
--- a/test/CodeGen/X86/shrink-fp-const2.ll
+++ b/test/CodeGen/X86/shrink-fp-const2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep flds
+; RUN: llc < %s -march=x86 | grep flds
 ; This should be a flds, not fldt.
 define x86_fp80 @test2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 2721595..13f9329 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,50 +1,48 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep sin\$ | count 3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep cos\$ | count 3
 
-declare float  @sinf(float)
+declare float  @sinf(float) readonly
 
-declare double @sin(double)
+declare double @sin(double) readonly
 
-declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @sinl(x86_fp80) readonly
 
 define float @test1(float %X) {
-        %Y = call float @sinf(float %X)
+        %Y = call float @sinf(float %X) readonly
         ret float %Y
 }
 
 define double @test2(double %X) {
-        %Y = call double @sin(double %X)
+        %Y = call double @sin(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @sinl(x86_fp80 %X)
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
-declare float @cosf(float)
+declare float @cosf(float) readonly
 
-declare double @cos(double)
+declare double @cos(double) readonly
 
-declare x86_fp80 @cosl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80) readonly
 
 define float @test4(float %X) {
-        %Y = call float @cosf(float %X)
+        %Y = call float @cosf(float %X) readonly
         ret float %Y
 }
 
 define double @test5(double %X) {
-        %Y = call double @cos(double %X)
+        %Y = call double @cos(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test6(x86_fp80 %X) {
-        %Y = call x86_fp80 @cosl(x86_fp80 %X)
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 0000000..0f4e63f
--- /dev/null
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK-NEXT: divsd
+; CHECK:      testb $1, %dil
+; CHECK-NEXT: jne
+; CHECK:      divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index 8b87f74..9ec9182 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep movs
+; RUN: llc < %s | not grep movs
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
index c3dbfd7..7c23adb 100644
--- a/test/CodeGen/X86/smul-with-overflow-2.ll
+++ b/test/CodeGen/X86/smul-with-overflow-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 1
+; RUN: llc < %s -march=x86 | grep add | count 3
 
 define i32 @t1(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
index aa5e67a..49c31f5 100644
--- a/test/CodeGen/X86/smul-with-overflow-3.ll
+++ b/test/CodeGen/X86/smul-with-overflow-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jno} | count 1
+; RUN: llc < %s -march=x86 | grep {jno} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 6aefc03..6d125e4 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index 0c697de..a52135d 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 -soft-float | not grep xmm
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
 
 	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll
index 281ee77..fd40a7f 100644
--- a/test/CodeGen/X86/split-eh-lpad-edges.ll
+++ b/test/CodeGen/X86/split-eh-lpad-edges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep jmp
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
 ; rdar://6647639
 
 	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
index 0b7804d..07d4d52 100644
--- a/test/CodeGen/X86/split-select.ll
+++ b/test/CodeGen/X86/split-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep test | count 1
+; RUN: llc < %s -march=x86-64 | grep test | count 1
 
 define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
   %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
diff --git a/test/CodeGen/X86/split-vector-rem.ll b/test/CodeGen/X86/split-vector-rem.ll
index 8c88769..681c6b0 100644
--- a/test/CodeGen/X86/split-vector-rem.ll
+++ b/test/CodeGen/X86/split-vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 16
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
 
 define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
 	%m = srem <8 x i32> %t, %u
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
index 30e5af4..b945530 100644
--- a/test/CodeGen/X86/sret.ll
+++ b/test/CodeGen/X86/sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep ret | grep 4
+; RUN: llc < %s -march=x86 | grep ret | grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 5a888b2..b12a87d 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-1.ll b/test/CodeGen/X86/sse-align-1.ll
index 0edc6e0..c7a5cd5 100644
--- a/test/CodeGen/X86/sse-align-1.ll
+++ b/test/CodeGen/X86/sse-align-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-10.ll b/test/CodeGen/X86/sse-align-10.ll
index 1a23eb2..0f91697 100644
--- a/test/CodeGen/X86/sse-align-10.ll
+++ b/test/CodeGen/X86/sse-align-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index a10b102..aa1b437 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=linux | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=linux | grep movups
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 297f1c4..4f025b9 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep unpck %t | count 2
 ; RUN: grep shuf %t | count 2
 ; RUN: grep ps %t | count 4
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index ba693a2..102c3fb 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 5bbcd59..c42f7f0 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-4.ll b/test/CodeGen/X86/sse-align-4.ll
index f7e5fe3..4c59934 100644
--- a/test/CodeGen/X86/sse-align-4.ll
+++ b/test/CodeGen/X86/sse-align-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-5.ll b/test/CodeGen/X86/sse-align-5.ll
index 19e0eaf..21cd231 100644
--- a/test/CodeGen/X86/sse-align-5.ll
+++ b/test/CodeGen/X86/sse-align-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll
index dace291..0bbf4228 100644
--- a/test/CodeGen/X86/sse-align-6.ll
+++ b/test/CodeGen/X86/sse-align-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
index 7fb65b5..5784481 100644
--- a/test/CodeGen/X86/sse-align-7.ll
+++ b/test/CodeGen/X86/sse-align-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-8.ll b/test/CodeGen/X86/sse-align-8.ll
index 17a3d29..cfeff81 100644
--- a/test/CodeGen/X86/sse-align-8.ll
+++ b/test/CodeGen/X86/sse-align-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-9.ll b/test/CodeGen/X86/sse-align-9.ll
index 24b437a..cb26b95 100644
--- a/test/CodeGen/X86/sse-align-9.ll
+++ b/test/CodeGen/X86/sse-align-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index d8c3283..0e0e4a9 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep test
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
 
 define float @tst1(float %a, float %b) {
 	%tmp = tail call float @copysignf( float %b, float %a )
diff --git a/test/CodeGen/X86/sse-load-ret.ll b/test/CodeGen/X86/sse-load-ret.ll
index cbf3eb0..1ebcb1a 100644
--- a/test/CodeGen/X86/sse-load-ret.ll
+++ b/test/CodeGen/X86/sse-load-ret.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep movss
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 
 define double @test1(double* %P) {
         %X = load double* %P            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 0000000..17ffb5e
--- /dev/null
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -0,0 +1,392 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
index 806126d..da38f0e 100644
--- a/test/CodeGen/X86/sse-varargs.ll
+++ b/test/CodeGen/X86/sse-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xmm | grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
new file mode 100644
index 0000000..9f926f2
--- /dev/null
+++ b/test/CodeGen/X86/sse2.ll
@@ -0,0 +1,34 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 | FileCheck %s
+
+define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
new file mode 100644
index 0000000..703635c
--- /dev/null
+++ b/test/CodeGen/X86/sse3.ll
@@ -0,0 +1,273 @@
+; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9\
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movaps	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movaps	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, %xmm0, %eax
+; X64: 	pshufhw	$100, %xmm0, %xmm1
+; X64: 	pinsrw	$1, %eax, %xmm1
+; X64: 	pextrw	$1, %xmm0, %eax
+; X64: 	movaps	%xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movsd	(%rsi), %xmm0
+; X64: 		movhps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		movq	_g1@GOTPCREL(%rip), %rax
+; X64: 		movaps	(%rax), %xmm0
+; X64: 		pextrw	$4, %xmm0, %eax
+; X64: 		movaps	%xmm0, %xmm1
+; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		pshuflw	$8, %xmm1, %xmm1
+; X64: 		pinsrw	$2, %eax, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
+; X64: 		movq	_g2@GOTPCREL(%rip), %rax
+; X64: 		movq	%xmm1, (%rax)
+; X64: 		ret
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		movaps	LCPI17_0(%rip), %xmm1
+; X64: 		movd	%xmm1, %eax
+; X64: 		pinsrw	$0, %eax, %xmm1
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pinsrw	$1, %eax, %xmm1
+; X64: 		pextrw	$1, %xmm1, %ecx
+; X64: 		movd	%xmm1, %edx
+; X64: 		pinsrw	$0, %edx, %xmm1
+; X64: 		movzbl	%cl, %ecx
+; X64: 		andw	$-256, %ax
+; X64: 		orw	%cx, %ax
+; X64: 		movaps	%xmm1, %xmm0
+; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		ret
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
new file mode 100644
index 0000000..a734c05
--- /dev/null
+++ b/test/CodeGen/X86/sse41.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI8_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI8_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    setb	%al
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    setb	%al
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
new file mode 100644
index 0000000..c9c4d01
--- /dev/null
+++ b/test/CodeGen/X86/sse42.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+
+define i32 @crc32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_8:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_8:
+; X64:     crc32   %sil, %eax
+}
+
+
+define i32 @crc32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_16:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_16:
+; X64:     crc32   %si, %eax
+}
+
+
+define i32 @crc32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_32:
+; X64:     crc32   %esi, %eax
+}
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 547763e..dc3d6fe 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
+; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
 ; RUN:   grep fail | count 1
 
 declare float @test_f(float %f)
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index dda6f0d..cb65e9b 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
+; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
 
 ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
 ; fold the load into the andpd.
diff --git a/test/CodeGen/X86/stack-color-with-reg-2.ll b/test/CodeGen/X86/stack-color-with-reg-2.ll
index bc4182f..c1f2672 100644
--- a/test/CodeGen/X86/stack-color-with-reg-2.ll
+++ b/test/CodeGen/X86/stack-color-with-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
 
 	%"struct..0$_67" = type { i32, %"struct.llvm::MachineOperand"**, %"struct.llvm::MachineOperand"* }
 	%"struct..1$_69" = type { i32 }
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 72a985a..672f77e 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "loads eliminated" 
-; RUN:   grep stackcoloring %t | grep "stores eliminated"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
+; RUN:   grep asm-printer %t   | grep 179
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 0000000..9778fa1
--- /dev/null
+++ b/test/CodeGen/X86/stdarg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 0000000..37f86c6
--- /dev/null
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}
diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
index 70cb046..206886b 100644
--- a/test/CodeGen/X86/store-fp-constant.ll
+++ b/test/CodeGen/X86/store-fp-constant.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rodata
-; RUN: llvm-as < %s | llc -march=x86 | not grep literal
+; RUN: llc < %s -march=x86 | not grep rodata
+; RUN: llc < %s -march=x86 | not grep literal
 ;
 ; Check that no FP constants in this testcase ends up in the 
 ; constant pool.
diff --git a/test/CodeGen/X86/store-global-address.ll b/test/CodeGen/X86/store-global-address.ll
index 0695eee..c8d4cbc 100644
--- a/test/CodeGen/X86/store-global-address.ll
+++ b/test/CodeGen/X86/store-global-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movl | count 1
+; RUN: llc < %s -march=x86 | grep movl | count 1
 
 @dst = global i32 0             ; <i32*> [#uses=1]
 @ptr = global i32* null         ; <i32**> [#uses=1]
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index acef174..66d0e47 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 ;
 ; Test the add and load are folded into the store instruction.
 
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 09aaba1..0ccfe47 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {and	DWORD PTR} | count 2
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
index 945cf48..03ad093 100644
--- a/test/CodeGen/X86/storetrunc-fp.ll
+++ b/test/CodeGen/X86/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep flds
+; RUN: llc < %s -march=x86 | not grep flds
 
 define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
 	%c = fadd x86_fp80 %a, %b
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index cc26487..7aae9eb 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 ; P should be sunk into the loop and folded into the address mode. There
 ; shouldn't be any lea instructions inside the loop.
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 277a443..a99a9c9 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x float], align 32
 @A = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 98f0252..19f4079 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jb} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll
index 6b60f65..d718c85 100644
--- a/test/CodeGen/X86/subreg-to-reg-0.ll
+++ b/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 ; Do eliminate the zero-extension instruction and rely on
 ; x86-64's implicit zero-extension!
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index aa26f06..a297728 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-2.ll b/test/CodeGen/X86/subreg-to-reg-2.ll
index d0b40cd..49d2e88 100644
--- a/test/CodeGen/X86/subreg-to-reg-2.ll
+++ b/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
 ; rdar://6707985
 
 	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
index 6634538..931ae75 100644
--- a/test/CodeGen/X86/subreg-to-reg-3.ll
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep imull
+; RUN: llc < %s -march=x86-64 | grep imull
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index bb6af39..0ea5541 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep leaq %t
 ; RUN: not grep incq %t
 ; RUN: not grep decq %t
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll
index 81b262a..ba4c307 100644
--- a/test/CodeGen/X86/subreg-to-reg-5.ll
+++ b/test/CodeGen/X86/subreg-to-reg-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep addl %t
 ; RUN: not egrep {movl|movq} %t
 
diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll
index f18eef7..76430cd 100644
--- a/test/CodeGen/X86/subreg-to-reg-6.ll
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define i64 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
index f3c701f..55425bc 100644
--- a/test/CodeGen/X86/switch-zextload.ll
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 ; Do zextload, instead of a load and a separate zext.
 
diff --git a/test/CodeGen/X86/swizzle.ll b/test/CodeGen/X86/swizzle.ll
index d00bb9a..23e0c24 100644
--- a/test/CodeGen/X86/swizzle.ll
+++ b/test/CodeGen/X86/swizzle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movups
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
 ; rdar://6523650
 
 	%struct.vector4_t = type { <4 x float> }
diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
index 0ec6a77..8ef1f11 100644
--- a/test/CodeGen/X86/tailcall-i1.ll
+++ b/test/CodeGen/X86/tailcall-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i1 @i1test(i32, i32, i32, i32) {
   entry:
   %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index ff960b8..110472c 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
 ; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
 ; is enabled, ensure that a normal fastcc call has matching stack size
 
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index e94d7d8..d8be4b2 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
index 27b2a285..4e578d1 100644
--- a/test/CodeGen/X86/tailcall-void.ll
+++ b/test/CodeGen/X86/tailcall-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc void @i1test(i32, i32, i32, i32) {
   entry:
    tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index deedb86..a4f87c0 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 916be56..7002560 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7b65863..7c685b8 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep TAILCALL
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct
 ; moves to argument register where it can (for registers that are not used in 
 ; byval lowering - not rsi, not rdi, not rcx).
 ; Expect argument 4 to be moved directly to register edx.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
 ; Expect argument 6 to be moved directly to register r8.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index f614935..c0b609a 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 151701e..be4f96c 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
+; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
 
 declare i32 @putchar(i32)
 
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 54074eb..60e3be5 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index 60818e4..eaa7631 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c81327e..73c59bb 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,14 +1,17 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 | grep TAILCALL
+; RUN: llc < %s -tailcallopt -march=x86-64 | FileCheck %s
+
 ; Check that lowered arguments on the stack do not overwrite each other.
-; Move param %in1 to temp register (%eax).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	40(%rsp), %eax}
-; Add %in1 %p1 to another temporary register (%r9d).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r10d}
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r10d}
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  %edi, %eax
+; CHECK: addl  32(%rsp), %eax
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  40(%rsp), %r10d
 ; Move result of addition to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r10d, 40(%rsp)}
+; CHECK: movl  %eax, 40(%rsp)
 ; Move param %in2 to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%eax, 32(%rsp)}
+; CHECK: movl  %r10d, 32(%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
 
 declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)
 
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index a24a9a0..772ff6c 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
 ; rdar://5752025
 
 ; We don't want to fold the and into the test, because the and clobbers its
diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 0000000..64631ea
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink-bug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 0000000..1d63693
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %edi
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index dbbef0a..3d5f672 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {testl.*\(%r.i\), %} | count 3
+; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
 ; rdar://5671654
 ; The loads should fold into the testl instructions, no matter how
 ; the inputs are commuted.
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 0000000..4cad837
--- /dev/null
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 85ff360..0cae5c4 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
index 2f5f02b..fb61596 100644
--- a/test/CodeGen/X86/tls10.ll
+++ b/test/CodeGen/X86/tls10.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index b6aed9a..a2c1a1f 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movw	%gs:i@NTPOFF, %ax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movw	%fs:i@TPOFF, %ax} %t2
 
 @i = thread_local global i16 15
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
index b528839..c29f6ad 100644
--- a/test/CodeGen/X86/tls12.ll
+++ b/test/CodeGen/X86/tls12.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
 
 @i = thread_local global i8 15
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
index ec23a41..08778ec 100644
--- a/test/CodeGen/X86/tls13.ll
+++ b/test/CodeGen/X86/tls13.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
index 941601e..88426dd 100644
--- a/test/CodeGen/X86/tls14.ll
+++ b/test/CodeGen/X86/tls14.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
index 62f3677..7abf070 100644
--- a/test/CodeGen/X86/tls15.ll
+++ b/test/CodeGen/X86/tls15.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t | count 1
 ; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
 ; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
 ; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
 ; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index baa51bb..5a94296 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index 0618499..7327cc4 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	i@INDNTPOFF, %eax} %t
 ; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
 ; RUN: grep {movl	%fs:(%rax), %eax} %t2
 
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
index 33f221b..d2e40e3 100644
--- a/test/CodeGen/X86/tls4.ll
+++ b/test/CodeGen/X86/tls4.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
index ff7b9e0..4d2cc02 100644
--- a/test/CodeGen/X86/tls5.ll
+++ b/test/CodeGen/X86/tls5.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = internal thread_local global i32 15
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index ab53929..505106e 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
index 6a7739b..e9116e7 100644
--- a/test/CodeGen/X86/tls7.ll
+++ b/test/CodeGen/X86/tls7.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = hidden thread_local global i32 15
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
index fd9d472..375af94 100644
--- a/test/CodeGen/X86/tls8.ll
+++ b/test/CodeGen/X86/tls8.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index bc0a6f0..214146f 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = external hidden thread_local global i32
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 9a013ff..03ae6bf 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep ud2
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 25a1191..374d404 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,7 +1,7 @@
 ; An integer truncation to i1 should be done with an and instruction to make
 ; sure only the LSBit survives. Test that this is the case both for a returned
 ; value and as the operand of a branch.
-; RUN: llvm-as < %s | llc -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
+; RUN: llc < %s -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
 ; RUN:   count 5
 
 define i1 @test1(i32 %X) zeroext {
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 3fe4cd1..6f16a25 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
 ; rdar://6480363
 
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 5293b77..d0e13f6 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; rdar://6523745
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/twoaddr-delete.ll b/test/CodeGen/X86/twoaddr-delete.ll
index bbf4e62..77e3c75 100644
--- a/test/CodeGen/X86/twoaddr-delete.ll
+++ b/test/CodeGen/X86/twoaddr-delete.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
+; RUN: llc < %s -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 7655880..077fee0 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/twoaddr-remat.ll b/test/CodeGen/X86/twoaddr-remat.ll
index b74b70c..4940c78 100644
--- a/test/CodeGen/X86/twoaddr-remat.ll
+++ b/test/CodeGen/X86/twoaddr-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 59796 | count 3
+; RUN: llc < %s -march=x86 | grep 59796 | count 3
 
 	%Args = type %Value*
 	%Exec = type opaque*
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index d630437..da5105d 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 ; rdar://6504833
 
 define float @f(i32 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
index 148437f..41ee194 100644
--- a/test/CodeGen/X86/uint_to_fp.ll
+++ b/test/CodeGen/X86/uint_to_fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep {sub.*esp}
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep cvtsi2ss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
 ; rdar://6034396
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 547e179..7416051 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
 ; XFAIL: *
 
 ; FIXME: umul-with-overflow not supported yet.
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index 9e69154..d522bd8 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll
index bc93684..e3cb69c 100644
--- a/test/CodeGen/X86/urem-i8-constant.ll
+++ b/test/CodeGen/X86/urem-i8-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 111
+; RUN: llc < %s -march=x86 | grep 111
 
 define i8 @foo(i8 %tmp325) {
 	%t546 = urem i8 %tmp325, 37
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
index bd6045c..b5ebaa7 100644
--- a/test/CodeGen/X86/v4f32-immediate.ll
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
 
 define <4 x float> @foo() {
   ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index b0cdf49..4817db2 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
 
diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
index 4d76445..1182a30 100644
--- a/test/CodeGen/X86/variadic-node-pic.ll
+++ b/test/CodeGen/X86/variadic-node-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -code-model=large
+; RUN: llc < %s -relocation-model=pic -code-model=large
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_add.ll b/test/CodeGen/X86/vec_add.ll
index 72415a3..7c77d11 100644
--- a/test/CodeGen/X86/vec_add.ll
+++ b/test/CodeGen/X86/vec_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
 entry:
diff --git a/test/CodeGen/X86/vec_align.ll b/test/CodeGen/X86/vec_align.ll
index d88104d..e273115 100644
--- a/test/CodeGen/X86/vec_align.ll
+++ b/test/CodeGen/X86/vec_align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -relocation-model=static | grep movaps | count 2
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index ebdac7d..b3efc7b 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll
index 514de95..166d436 100644
--- a/test/CodeGen/X86/vec_clear.ll
+++ b/test/CodeGen/X86/vec_clear.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: not grep and %t
 ; RUN: not grep psrldq %t
 ; RUN: grep xorps %t
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 0000000..c8c7257
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: xorps
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movaps
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movaps
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f057c9a..f0158d6 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index d6726be..dab5dd1 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep extractps   %t | count 1
 ; RUN: grep pextrd      %t | count 1
 ; RUN: not grep pshufd  %t
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index ee7567c..b013730 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
 ; RUN: grep movss    %t | count 3
 ; RUN: grep movhlps  %t | count 1
 ; RUN: grep pshufd   %t | count 1
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index a801472..d49c70e 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @t1(<4 x float> %Q) {
         %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll
index 3939af5..462e16e 100644
--- a/test/CodeGen/X86/vec_i64.ll
+++ b/test/CodeGen/X86/vec_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movq %t | count 2
 
 ; Used movq to load i64 into a v2i64 when the top i64 is 0.
diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
index c7eb221..2951193 100644
--- a/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
 
 ; Inserts and extracts with variable indices must be lowered
 ; to memory accesses.
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index 7882839..bf43deb 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | \
+; RUN: opt < %s -scalarrepl -instcombine | \
 ; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
 
 ; This checks that various insert/extract idiom work without going to the
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index 8d0bcc4..b08044b 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
         %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index e43eca4..a18cd86 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
         %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 1a9768a..291fc04 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep psllq %t | grep 32
 ; RUN: grep pslldq %t | grep 12
 ; RUN: grep psrldq %t | grep 8
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 5ef270f..54aa43f 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 8cfc63a..9ede10f 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
 
 define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
index 0f6924c..650951c 100644
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 
 ; tests variable insert and extract of a 4 x i32
 
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index 3a9464c..a7274a9 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
 
 define void @test(<4 x float>* %F, i32 %I) {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_insert_4.ll b/test/CodeGen/X86/vec_insert_4.ll
index a0aa0c0..2c31e56 100644
--- a/test/CodeGen/X86/vec_insert_4.ll
+++ b/test/CodeGen/X86/vec_insert_4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
index 6712276..8812c4f 100644
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index f895762..1dc0b16 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 2
 ; RUN: grep andnps %t
 ; RUN: grep movaps %t | count 2
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 106966f..66762b4 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: not grep shuf %t
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
index ecb825b..033e9f7 100644
--- a/test/CodeGen/X86/vec_select.ll
+++ b/test/CodeGen/X86/vec_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 
 define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
         %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-2.ll b/test/CodeGen/X86/vec_set-2.ll
index ae9530d..a8f1187 100644
--- a/test/CodeGen/X86/vec_set-2.ll
+++ b/test/CodeGen/X86/vec_set-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
 
 define <4 x float> @test1(float %a) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index 546ca0b..ada17e0 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep pshufd %t | count 2
 
 define <4 x float> @test(float %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set-4.ll b/test/CodeGen/X86/vec_set-4.ll
index da7ef80..332c8b7 100644
--- a/test/CodeGen/X86/vec_set-4.ll
+++ b/test/CodeGen/X86/vec_set-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2
 
 define <2 x i64> @test(i16 %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
index d332970..f811a74 100644
--- a/test/CodeGen/X86/vec_set-5.ll
+++ b/test/CodeGen/X86/vec_set-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps   %t | count 1
 ; RUN: grep movq      %t | count 2
 
diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
index c7b6747..0713d95 100644
--- a/test/CodeGen/X86/vec_set-6.ll
+++ b/test/CodeGen/X86/vec_set-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movss    %t | count 1
 ; RUN: grep movq     %t | count 1
 ; RUN: grep shufps   %t | count 1
diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll
index 6f98c51..d993178 100644
--- a/test/CodeGen/X86/vec_set-7.ll
+++ b/test/CodeGen/X86/vec_set-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 
 define <2 x i64> @test(<2 x i64>* %p) nounwind {
 	%tmp = bitcast <2 x i64>* %p to double*		
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index cca436b..9697f11 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+; RUN: llc < %s -march=x86-64 | not grep movsd
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
 
 define <2 x i64> @test(i64 %i) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 5c1b8f5..3656e5f 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll
index f33263f..f05eecf 100644
--- a/test/CodeGen/X86/vec_set-A.ll
+++ b/test/CodeGen/X86/vec_set-A.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
 define <2 x i64> @test1() nounwind {
 entry:
 	ret <2 x i64> < i64 1, i64 0 >
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
index d318964..f5b3e8b 100644
--- a/test/CodeGen/X86/vec_set-B.ll
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep esp | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
 
 ; These should both generate something like this:
 ;_test3:
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index fc86853..7636ac3 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
index 71bdd84..3d6369e 100644
--- a/test/CodeGen/X86/vec_set-D.ll
+++ b/test/CodeGen/X86/vec_set-D.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-E.ll b/test/CodeGen/X86/vec_set-E.ll
index ee63234..d78be66 100644
--- a/test/CodeGen/X86/vec_set-E.ll
+++ b/test/CodeGen/X86/vec_set-E.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @t(float %X) nounwind  {
 	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index db83eb2..4f0acb2 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_set-G.ll b/test/CodeGen/X86/vec_set-G.ll
index f81907c..4a542fe 100644
--- a/test/CodeGen/X86/vec_set-G.ll
+++ b/test/CodeGen/X86/vec_set-G.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 
 define fastcc void @t(<4 x float> %A) nounwind  {
 	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-H.ll b/test/CodeGen/X86/vec_set-H.ll
index ea7b853..5037e36 100644
--- a/test/CodeGen/X86/vec_set-H.ll
+++ b/test/CodeGen/X86/vec_set-H.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movz
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
 
 define <2 x i64> @doload64(i16 signext  %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
index e1c44d0..64f36f9 100644
--- a/test/CodeGen/X86/vec_set-I.ll
+++ b/test/CodeGen/X86/vec_set-I.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xorp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
 
 define void @t1() nounwind  {
 	%tmp298.i.i = load <4 x float>* null, align 16
diff --git a/test/CodeGen/X86/vec_set-J.ll b/test/CodeGen/X86/vec_set-J.ll
index 488d360..d90ab85 100644
--- a/test/CodeGen/X86/vec_set-J.ll
+++ b/test/CodeGen/X86/vec_set-J.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 ; PR2472
 
 define <4 x i32> @a(<4 x i32> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 77636ed..c316df8 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep punpckl | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpckl | count 7
 
 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
         %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shift.ll b/test/CodeGen/X86/vec_shift.ll
index 9c595bc..ddf0469 100644
--- a/test/CodeGen/X86/vec_shift.ll
+++ b/test/CodeGen/X86/vec_shift.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psrlq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shift2.ll b/test/CodeGen/X86/vec_shift2.ll
index b73f5f4..c5f9dc4 100644
--- a/test/CodeGen/X86/vec_shift2.ll
+++ b/test/CodeGen/X86/vec_shift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
diff --git a/test/CodeGen/X86/vec_shift3.ll b/test/CodeGen/X86/vec_shift3.ll
index 2641c5d..1ebf455 100644
--- a/test/CodeGen/X86/vec_shift3.ll
+++ b/test/CodeGen/X86/vec_shift3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
 
 define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
index 297469d..a63e386 100644
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ b/test/CodeGen/X86/vec_shuffle-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep pshufd   %t | count 1
 ; RUN: not grep {sub.*esp} %t
diff --git a/test/CodeGen/X86/vec_shuffle-11.ll b/test/CodeGen/X86/vec_shuffle-11.ll
index 463858f..640745a 100644
--- a/test/CodeGen/X86/vec_shuffle-11.ll
+++ b/test/CodeGen/X86/vec_shuffle-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
+; RUN: llc < %s -march=x86 -mattr=+sse2 
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
 
 define <4 x i32> @test() nounwind {
         %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index 6e8d0b8..f0cfc44 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movq | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
 
 define <4 x i32> @t1(i32 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-15.ll b/test/CodeGen/X86/vec_shuffle-15.ll
index 062f77c..5a9b8fd 100644
--- a/test/CodeGen/X86/vec_shuffle-15.ll
+++ b/test/CodeGen/X86/vec_shuffle-15.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
 	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index b3a5b76..470f676 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep shufps %t | count 4
 ; RUN: grep movaps %t | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep pshufd %t | count 4
 ; RUN: not grep shufps %t
 ; RUN: not grep mov %t
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index 992d791..9c33abb 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep xor
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
+; RUN: llc < %s -march=x86-64 | not grep xor
 ; PR2108
 
 define <2 x i64> @doload64(i64 %x) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-18.ll b/test/CodeGen/X86/vec_shuffle-18.ll
index 8539263..1104a4a 100644
--- a/test/CodeGen/X86/vec_shuffle-18.ll
+++ b/test/CodeGen/X86/vec_shuffle-18.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
 
 	%struct.vector4_t = type { <4 x float> }
 
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 4e7db20..9fc09df 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 7189084..6d1bac0 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
index d19f110..5307ced 100644
--- a/test/CodeGen/X86/vec_shuffle-22.ll
+++ b/test/CodeGen/X86/vec_shuffle-22.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium-m -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=pentium-m -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep pshufd %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movddup %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 7e8aa5d..05a3a1e 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                |     grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
 
 define i32 @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 170ba35..7562f1d 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  |     grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2  |     grep punpck
 
 define i32 @t() nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index 18922aa..2aa2d25 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
  
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index abd6e90..8cc15d1 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep unpckhps %t | count 3
 
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 231ac0c..d700ccb 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep addps %t | count 2
 ; RUN: grep mulps %t | count 2
 ; RUN: grep subps %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
index f7e5001..343685b 100644
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ b/test/CodeGen/X86/vec_shuffle-28.ll
@@ -1,8 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep movd %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 ; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 018b4cf..556f103 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movhlps %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 50a3df8..3f69150 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
-; RUN: grep pshufhw %t | grep 161 | count 1
+; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
 ; RUN: grep shufps %t | count 1
 ; RUN: not grep pslldq %t
 
 ; Test case when creating pshufhw, we incorrectly set the higher order bit
 ; for an undef,
-define void @test(<8 x i16>* %dest, <8 x i16> %in) {
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
 entry:
   %0 = load <8 x i16>* %dest
   %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
@@ -14,7 +14,7 @@ entry:
 }                              
 
 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
-define void @test2(<4 x i32>* %dest, <4 x i32> %in) {
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
 entry:
   %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
   store <4 x i32> %0, <4 x i32>* %dest
diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll
index efcd030..bb06e15 100644
--- a/test/CodeGen/X86/vec_shuffle-31.ll
+++ b/test/CodeGen/X86/vec_shuffle-31.ll
@@ -1,9 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep movlhps %t | count 1
-; RUN: grep pshufhw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-34.ll b/test/CodeGen/X86/vec_shuffle-34.ll
index 99c95d1..d057b3f 100644
--- a/test/CodeGen/X86/vec_shuffle-34.ll
+++ b/test/CodeGen/X86/vec_shuffle-34.ll
@@ -1,10 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep punpcklqdq %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
-; RUN: grep pshufb %t | count 2
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
 
 define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
index 7be8468..7f0fcb5 100644
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
 ; RUN: grep pextrw %t | count 13
 ; RUN: grep pinsrw %t | count 14
 ; RUN: grep rolw %t | count 13
 ; RUN: not grep esp %t
 ; RUN: not grep ebp %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
 ; RUN: grep pshufb %t | count 3
 
 define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 0051187..8a93a7e 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep pshufb %t | count 1
 
 
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
index 3c03baa..829fedf 100644
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ b/test/CodeGen/X86/vec_shuffle-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep shuf %t | count 2
 ; RUN: not grep unpck %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index e356f24..c24167a 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movhlps %t | count 1
 ; RUN: grep shufps  %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
index f7c9f2d..f034b0a 100644
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ b/test/CodeGen/X86/vec_shuffle-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: grep movups %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
index fbcfac5..4cdca09 100644
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ b/test/CodeGen/X86/vec_shuffle-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep xorps %t | count 1
 ; RUN: not grep shufps %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
index 73d75e6..964ce7b 100644
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ b/test/CodeGen/X86/vec_shuffle-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 | \
 ; RUN:   not grep shufps
 
 define void @test(<4 x float>* %res, <4 x float>* %A) {
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
index 68577d4..2bef24d 100644
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ b/test/CodeGen/X86/vec_shuffle-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep punpck %t | count 2
 ; RUN: not grep pextrw %t
 
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index f43aa1d..c05b79a 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep shufp   %t | count 1
 ; RUN: grep movupd  %t | count 1
 ; RUN: grep pshufhw %t | count 1
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index c6e3ddd..cde5ae9 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 1f1a214..649b85c 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklwd %t | count 4
 ; RUN: grep punpckhwd %t | count 4
 ; RUN: grep "pshufd" %t | count 8
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
index 220e1cd..d9941e6 100644
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ b/test/CodeGen/X86/vec_splat-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklbw %t | count 16
 ; RUN: grep punpckhbw %t | count 16
 ; RUN: grep "pshufd" %t | count 16
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 89914fd..a87fbd0 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 69900a6..b1613fb 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t
 ; RUN: grep minss %t | grep CPI | count 2
 ; RUN: grep CPI   %t | not grep movss
 
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index efdf564..e42b538 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 0a7a543..ae5af58 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 0ccf745..296378c 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
 
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll
index 3291658..edf58b9 100644
--- a/test/CodeGen/X86/vector-intrinsics.ll
+++ b/test/CodeGen/X86/vector-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call | count 16
+; RUN: llc < %s -march=x86-64 | grep call | count 16
 
 declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
 declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index cfdd34e..51cd872 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 4
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
 
 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
 	%m = srem <4 x i32> %t, %u
diff --git a/test/CodeGen/X86/vector-variable-idx.ll b/test/CodeGen/X86/vector-variable-idx.ll
index 82927e9..2a4d18c 100644
--- a/test/CodeGen/X86/vector-variable-idx.ll
+++ b/test/CodeGen/X86/vector-variable-idx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movss | count 2
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
 ; PR2676
 
 define float @foo(<4 x float> %p, i32 %t) {
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 8e1de2f..3fff849 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 > %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah > %t
+; RUN: llc < %s -march=x86 -mcpu=i386 > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/vfcmp.ll b/test/CodeGen/X86/vfcmp.ll
index 85b82a0..f5f5293 100644
--- a/test/CodeGen/X86/vfcmp.ll
+++ b/test/CodeGen/X86/vfcmp.ll
@@ -1,8 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2620
 
-define void @t(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
-	vfcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
 	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
 	lshr i64 %2, 63		; <i64>:3 [#uses=1]
 	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index f919b5d..5e1e0c8 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
 
 @x = external global double
 
diff --git a/test/CodeGen/X86/vortex-bug.ll b/test/CodeGen/X86/vortex-bug.ll
index d62bb24..40f1117 100644
--- a/test/CodeGen/X86/vortex-bug.ll
+++ b/test/CodeGen/X86/vortex-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.blktkntype = type { i32, i32 }
 	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index d7a20e4..ae845e0 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq  %t | count 2
-; RUN: grep pslld %t | count 2
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shl = shl <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -25,6 +27,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %shl, <4 x i32>* %dst
   ret void
@@ -32,6 +36,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psllw
   %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %shl, <8 x i16>* %dst
   ret void
 }
 
+; Make sure the shift amount is properly zero extended.
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 0807174..36feb11 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrlq  %t | count 2
-; RUN: grep psrld %t | count 2
-; RUN: grep psrlw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psrlq
   %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %lshr, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrld
   %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
   store <4 x i32> %lshr, <4 x i32>* %dst
   ret void
@@ -31,6 +35,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psrlw
   %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
 }
 
+; properly zero extend the shift amount
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -61,4 +75,4 @@ entry:
   %lshr = lshr <8 x i16> %val, %7
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index eea8ad1..20d3f48 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,13 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrad %t | count 2
-; RUN: grep psraw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 ; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: sarl
   %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %ashr, <2 x i64>* %dst
   ret void
@@ -15,6 +17,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
   %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %ashr, <4 x i32>* %dst
   ret void
@@ -22,6 +26,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -33,6 +40,8 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
   %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
@@ -40,6 +49,10 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -51,4 +64,4 @@ entry:
   %ashr = ashr <8 x i16> %val, %7
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 03ab95c..9773cbe 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,21 +1,23 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq %t | count 1
-; RUN: grep pslld %t | count 3
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
 }
 
+; shift1b can't use a packed shift
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: shll
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -32,6 +36,8 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -40,6 +46,8 @@ entry:
 
 define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2c:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -48,6 +56,9 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
   %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
   %shl = shl <8 x i16> %val, %shamt
   store <8 x i16> %shl, <8 x i16>* %dst
@@ -56,6 +67,9 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 0000000..a543f38
--- /dev/null
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
index 8895cdf..9dd8478 100644
--- a/test/CodeGen/X86/vshift_scalar.ll
+++ b/test/CodeGen/X86/vshift_scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Legalization test that requires scalarizing a vector.
 
diff --git a/test/CodeGen/X86/vshift_split.ll b/test/CodeGen/X86/vshift_split.ll
index 8f485dd..359d36d 100644
--- a/test/CodeGen/X86/vshift_split.ll
+++ b/test/CodeGen/X86/vshift_split.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 ; Example that requires splitting and expanding a vector shift.
 define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
 entry:
-	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
 	ret <2 x i64> %shr
 }
diff --git a/test/CodeGen/X86/vshift_split2.ll b/test/CodeGen/X86/vshift_split2.ll
index e943849..0f8c2b8 100644
--- a/test/CodeGen/X86/vshift_split2.ll
+++ b/test/CodeGen/X86/vshift_split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; Legalization example that requires splitting a large vector into smaller pieces.
 
diff --git a/test/CodeGen/X86/weak.ll b/test/CodeGen/X86/weak.ll
index 28638af..8590e8d 100644
--- a/test/CodeGen/X86/weak.ll
+++ b/test/CodeGen/X86/weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @a = extern_weak global i32             ; <i32*> [#uses=1]
 @b = global i32* @a             ; <i32**> [#uses=0]
 
diff --git a/test/CodeGen/X86/wide-integer-fold.ll b/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 0000000..b3b4d24
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-fold.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 4190781..8f607f5 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pextrb %t | count 1
 ; RUN: not grep pextrw %t
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index de6cd087..e2420f0 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pand %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index fbba445..a22d254 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd %t | count 2
 ; RUN: grep pextrw %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index e19ab65..898bff0 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep psubw  %t | count 1
 ; RUN: grep pmullw %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 6ff099d..1ecf09d 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pmulld  %t | count 1
 ; RUN: grep psubd  %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 7b0bb33..3583258 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep mulps  %t | count 1
 ; RUN: grep addps  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index ed8d27c..441a360 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd  %t | count 1
 ; RUN: grep pextrd  %t | count 1
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 3b45ce3..ded5707 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pextrd  %t | count 5
 ; RUN: grep movd  %t | count 3
 
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 33cc41f..67a760f 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddd  %t | count 1
 ; RUN: grep pextrd %t | count 2
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index b090cb1..614eeed 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep sarb  %t | count 8
 
 ; v8i8 that is widen to v16i8 then split
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 7696942..92618d6 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; bitcast a i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 0fa1b7a..386f749 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx -o %t
 ; RUN: grep movd  %t | count 1
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a4aab7b..ccc8b4f 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pshufd %t | count 1
 ; RUN: grep paddd  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index 191a261..9b7ab74 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; sign extension v2i32 to v2i16
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 154788d..4ec76a9 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; grep cvtsi2ss  %t | count 1 
 ; sign to float v2i16 to v2f32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 1ea5788..61a26a8 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 0000000..f6c4af0
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %eax
+; CHECK: movl  (%rsi), %ecx
+; CHECK: movl  %ecx, (%rdi)
+; CHECK: movl  %eax, (%rsi)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 0000000..2d34b31
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movq    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: call    killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
index 3d757b8..aca0b67 100644
--- a/test/CodeGen/X86/widen_select-1.ll
+++ b/test/CodeGen/X86/widen_select-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening select v6i32 and then a sub
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index a676f33..15da870 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index c2dfa3d..617cc1de 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 3d61e5d..3c73891 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%edi, %eax}
+; RUN: llc < %s | grep {movl.*%edi, %eax}
 ; This should be a single mov, not a load of immediate + andq.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
index 22a095b..ec8dd8e 100644
--- a/test/CodeGen/X86/x86-64-arg.ll
+++ b/test/CodeGen/X86/x86-64-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl	%edi, %eax}
+; RUN: llc < %s | grep {movl	%edi, %eax}
 ; The input value is already sign extended, don't re-extend it.
 ; This testcase corresponds to:
 ;   int test(short X) { return (int)X; }
diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
index 8ccf8b6..2640e593 100644
--- a/test/CodeGen/X86/x86-64-asm.ll
+++ b/test/CodeGen/X86/x86-64-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1029
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 15a30de..79316f2 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep rsp
-; RUN: llvm-as < %s | llc | grep cvttsd2siq
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-disp.ll b/test/CodeGen/X86/x86-64-disp.ll
index 4a8f6cd..d8059eb 100644
--- a/test/CodeGen/X86/x86-64-disp.ll
+++ b/test/CodeGen/X86/x86-64-disp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 ; Fold an offset into an address even if it's not a 32-bit
 ; signed integer.
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
index 8006099..57163d3 100644
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | grep rbp
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
 
 define i64* @stack_end_address() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/x86-64-gv-offset.ll b/test/CodeGen/X86/x86-64-gv-offset.ll
index b89e1b9..365e4af 100644
--- a/test/CodeGen/X86/x86-64-gv-offset.ll
+++ b/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
 
 	%struct.x = type { float, double }
 @X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
index 4beb5c2..b4f1fa6 100644
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {shll.*3, %edi}
+; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
 ; PR3829
 ; The generated code should multiply by 3 (sizeof i8*) as an i32,
 ; not as an i64!
diff --git a/test/CodeGen/X86/x86-64-mem.ll b/test/CodeGen/X86/x86-64-mem.ll
index 7497362..d15f516 100644
--- a/test/CodeGen/X86/x86-64-mem.ll
+++ b/test/CodeGen/X86/x86-64-mem.ll
@@ -1,10 +1,9 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1
 ; RUN: grep GOTPCREL %t1 | count 4
 ; RUN: grep %%rip      %t1 | count 6
 ; RUN: grep movq     %t1 | count 6
 ; RUN: grep leaq     %t1 | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=static -o %t2 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2
 ; RUN: grep movl %t2 | count 2
 ; RUN: grep movq %t2 | count 2
 
diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
index f5303c6..b21918e 100644
--- a/test/CodeGen/X86/x86-64-pic-1.ll
+++ b/test/CodeGen/X86/x86-64-pic-1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f@PLT} %t1
 
 define void @g() {
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index bc0d0c0..0f65e57 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	g@PLT} %t1
 
 @g = alias weak i32 ()* @f
diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
index f7e0def..ef81685 100644
--- a/test/CodeGen/X86/x86-64-pic-11.ll
+++ b/test/CodeGen/X86/x86-64-pic-11.ll
@@ -1,8 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	__fixunsxfti@PLT} %t1
 
-define i128 @f(x86_fp80 %a)  {
+define i128 @f(x86_fp80 %a) nounwind {
 entry:
 	%tmp78 = fptoui x86_fp80 %a to i128
 	ret i128 %tmp78
diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
index 39aecba..a52c564 100644
--- a/test/CodeGen/X86/x86-64-pic-2.ll
+++ b/test/CodeGen/X86/x86-64-pic-2.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index 0f5f4b7..246c00f 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index f8dfa92..90fc119 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
 
 @a = global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 694755d..6369bde 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index 965a550..6e19ad3 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 95b7197f..4d98ee6 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
 
 define void ()* @g() nounwind {
diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
index 369e0cf..d3b567c 100644
--- a/test/CodeGen/X86/x86-64-pic-8.ll
+++ b/test/CodeGen/X86/x86-64-pic-8.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index 175ec4e..0761031 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-ret0.ll b/test/CodeGen/X86/x86-64-ret0.ll
index d4252e7..c74f6d8 100644
--- a/test/CodeGen/X86/x86-64-ret0.ll
+++ b/test/CodeGen/X86/x86-64-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define i32 @f() nounwind  {
 	tail call void @t( i32 1 ) nounwind 
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index 369527f..7f96543 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep movswl
+; RUN: llc < %s | grep movswl
 
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 9298661..7b5f189 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,9 +1,11 @@
-; RUN: llvm-as < %s | llc | grep {movq	%rdi, %rax}
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 	%struct.foo = type { [4 x i64] }
 
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -52,3 +54,10 @@ entry:
 return:		; preds = %entry
 	ret void
 }
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
index 2964dd3..428f449 100644
--- a/test/CodeGen/X86/x86-64-varargs.ll
+++ b/test/CodeGen/X86/x86-64-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
 
 @.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/x86-frameaddr.ll b/test/CodeGen/X86/x86-frameaddr.ll
index b9d6d13..d595874 100644
--- a/test/CodeGen/X86/x86-frameaddr.ll
+++ b/test/CodeGen/X86/x86-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | grep ebp
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-frameaddr2.ll b/test/CodeGen/X86/x86-frameaddr2.ll
index f50ab07..c509115 100644
--- a/test/CodeGen/X86/x86-frameaddr2.ll
+++ b/test/CodeGen/X86/x86-frameaddr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-store-gv-addr.ll b/test/CodeGen/X86/x86-store-gv-addr.ll
index 799340d..089517a 100644
--- a/test/CodeGen/X86/x86-store-gv-addr.ll
+++ b/test/CodeGen/X86/x86-store-gv-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
 
 @v = external global i32, align 8
 @v_addr = external global i32*, align 8
diff --git a/test/CodeGen/X86/xmm-r64.ll b/test/CodeGen/X86/xmm-r64.ll
index f7d2143..2a6b5c7 100644
--- a/test/CodeGen/X86/xmm-r64.ll
+++ b/test/CodeGen/X86/xmm-r64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x i32> @test() {
         %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
new file mode 100644
index 0000000..7bd06bb
--- /dev/null
+++ b/test/CodeGen/X86/xor.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	xorps	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl	%esi
+; X64:	andl	%edi, %esi
+; X64:	movl	%esi, %eax
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notw	[[REG:%[a-z]+]]
+; X64:    andw	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notw	[[REG:%[a-z]+]]
+; X32:    andw	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 7640ba5..3e3bb95 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,16 +1,40 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xor | count 4
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86 | not grep fldl
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
 declare void @bar(double %x)
 declare void @barf(float %x)
 
 define double @foo() nounwind {
+
   call void @bar(double 0.0)
   ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
+
+
 define float @foof() nounwind {
   call void @barf(float 0.0)
   ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
index 1a73464..ae6221a 100644
--- a/test/CodeGen/X86/zext-inreg-0.ll
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movzbq %t
 ; RUN: not grep movzwq %t
diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll
index bc8e482..17fe374 100644
--- a/test/CodeGen/X86/zext-inreg-1.ll
+++ b/test/CodeGen/X86/zext-inreg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 ; These tests differ from the ones in zext-inreg-0.ll in that
 ; on x86-64 they do require and instructions.
diff --git a/test/CodeGen/XCore/2008-11-17-Shl64.ll b/test/CodeGen/XCore/2008-11-17-Shl64.ll
index 97ea41b..04b1b5a 100644
--- a/test/CodeGen/XCore/2008-11-17-Shl64.ll
+++ b/test/CodeGen/XCore/2008-11-17-Shl64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3080
 define i64 @test(i64 %a) {
 	%result = shl i64 %a, 1
diff --git a/test/CodeGen/XCore/2009-01-08-Crash.ll b/test/CodeGen/XCore/2009-01-08-Crash.ll
index 6f5fb7c..a31ea1e 100644
--- a/test/CodeGen/XCore/2009-01-08-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ;; This caused a compilation failure since the
 ;; address arithmetic was folded into the LDWSP instruction,
 ;; resulting in a negative offset which eliminateFrameIndex was
diff --git a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
index b9333c9..b2bbcb1 100644
--- a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3324
 define double @f1(double %a, double %b, double %c, double %d, double %e, double %f, double %g) nounwind {
 entry:
diff --git a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
index e834d66..a6b9699 100644
--- a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
+++ b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 ; PR3898
 
 define i32 @vector_param(<2 x double> %x) nounwind {
diff --git a/test/CodeGen/XCore/2009-07-15-store192.ll b/test/CodeGen/XCore/2009-07-15-store192.ll
new file mode 100644
index 0000000..5278af8
--- /dev/null
+++ b/test/CodeGen/XCore/2009-07-15-store192.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore > %t1.s
+define void @store32(i8* %p) nounwind {
+entry:
+	%0 = bitcast i8* %p to i192*
+	store i192 0, i192* %0, align 4
+	ret void
+}
diff --git a/test/CodeGen/XCore/addsub64.ll b/test/CodeGen/XCore/addsub64.ll
index 41224fc..a1494ad 100644
--- a/test/CodeGen/XCore/addsub64.ll
+++ b/test/CodeGen/XCore/addsub64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore -mcpu=xs1b-generic > %t1.s
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic > %t1.s
 ; RUN: grep ladd %t1.s | count 2
 ; RUN: grep lsub %t1.s | count 2
 define i64 @add64(i64 %a, i64 %b) {
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
new file mode 100644
index 0000000..d585e8b
--- /dev/null
+++ b/test/CodeGen/XCore/ashr.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @ashr(i32 %a, i32 %b) {
+	%1 = ashr i32 %a, %b
+	ret i32 %1
+}
+; CHECK: ashr:
+; CHECK-NEXT: ashr r0, r0, r1
+
+define i32 @ashri1(i32 %a) {
+	%1 = ashr i32 %a, 24
+	ret i32 %1
+}
+; CHECK: ashri1:
+; CHECK-NEXT: ashr r0, r0, 24
+
+define i32 @ashri2(i32 %a) {
+	%1 = ashr i32 %a, 31
+	ret i32 %1
+}
+; CHECK: ashri2:
+; CHECK-NEXT: ashr r0, r0, 32
+
+define i32 @f1(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	br i1 %1, label %less, label %not_less
+less:
+	ret i32 10
+not_less:
+	ret i32 17
+}
+; CHECK: f1:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+
+define i32 @f2(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	br i1 %1, label %greater, label %not_greater
+greater:
+	ret i32 10
+not_greater:
+	ret i32 17
+}
+; CHECK: f2:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bt r0
+
+define i32 @f3(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f3:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 10
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 17
+
+define i32 @f4(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f4:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 17
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 10
+
+define i32 @f5(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = zext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: f5:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: eq r0, r0, 0
diff --git a/test/CodeGen/XCore/basictest.ll b/test/CodeGen/XCore/basictest.ll
index 803ffcb..de5eaff 100644
--- a/test/CodeGen/XCore/basictest.ll
+++ b/test/CodeGen/XCore/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/XCore/bitrev.ll b/test/CodeGen/XCore/bitrev.ll
index 38f3948..09202d3 100644
--- a/test/CodeGen/XCore/bitrev.ll
+++ b/test/CodeGen/XCore/bitrev.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep bitrev %t1.s | count 1 
 declare i32 @llvm.xcore.bitrev(i32)
 
diff --git a/test/CodeGen/XCore/constants.ll b/test/CodeGen/XCore/constants.ll
new file mode 100644
index 0000000..95fa11e
--- /dev/null
+++ b/test/CodeGen/XCore/constants.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: .LCPI1_0:
+; CHECK: .long 12345678
+; CHECK: f:
+; CHECK: ldw r0, cp[.LCPI1_0]
+define i32 @f() {
+entry:
+	ret i32 12345678
+}
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
index 334f0d5..8211f85 100644
--- a/test/CodeGen/XCore/cos.ll
+++ b/test/CodeGen/XCore/cos.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl cosf" %t1.s | count 1
 ; RUN: grep "bl cos" %t1.s | count 2
 declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
index 8412e7a..d23d484 100644
--- a/test/CodeGen/XCore/exp.ll
+++ b/test/CodeGen/XCore/exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl expf" %t1.s | count 1
 ; RUN: grep "bl exp" %t1.s | count 2
 declare double @llvm.exp.f64(double)
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
index a53b767..4c4d17f4 100644
--- a/test/CodeGen/XCore/exp2.ll
+++ b/test/CodeGen/XCore/exp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl exp2f" %t1.s | count 1
 ; RUN: grep "bl exp2" %t1.s | count 2
 declare double @llvm.exp2.f64(double)
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index 3fb7b01..e3dd3dd 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
-; RUN: grep "xor" %t1.s | count 1
+; RUN: llc < %s -march=xcore | grep "xor" | count 1
 define i1 @test(double %F) nounwind {
 entry:
 	%0 = fsub double -0.000000e+00, %F
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index 810e8ad..ecab65c 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "get r11, id" %t1.s | count 1 
 declare i32 @llvm.xcore.getid()
 
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
new file mode 100644
index 0000000..342e5932
--- /dev/null
+++ b/test/CodeGen/XCore/globals.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G1() {
+entry:
+; CHECK: addr_G1:
+; CHECK: ldaw r0, dp[G1]
+	ret i32* @G1
+}
+
+define i32 *@addr_G2() {
+entry:
+; CHECK: addr_G2:
+; CHECK: ldaw r0, dp[G2]
+	ret i32* @G2
+}
+
+define i32 *@addr_G3() {
+entry:
+; CHECK: addr_G3:
+; CHECK: ldaw r11, cp[G3]
+; CHECK: mov r0, r11
+	ret i32* @G3
+}
+
+define i32 **@addr_G4() {
+entry:
+; CHECK: addr_G4:
+; CHECK: ldaw r0, dp[G4]
+	ret i32** @G4
+}
+
+define i32 **@addr_G5() {
+entry:
+; CHECK: addr_G5:
+; CHECK: ldaw r11, cp[G5]
+; CHECK: mov r0, r11
+	ret i32** @G5
+}
+
+define i32 **@addr_G6() {
+entry:
+; CHECK: addr_G6:
+; CHECK: ldaw r0, dp[G6]
+	ret i32** @G6
+}
+
+define i32 **@addr_G7() {
+entry:
+; CHECK: addr_G7:
+; CHECK: ldaw r11, cp[G7]
+; CHECK: mov r0, r11
+	ret i32** @G7
+}
+
+define i32 *@addr_G8() {
+entry:
+; CHECK: addr_G8:
+; CHECK: ldaw r0, dp[G8]
+	ret i32* @G8
+}
+
+@G1 = global i32 4712
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G1:
+
+@G2 = global i32 0
+; CHECK: .section .dp.bss,"awd",@nobits
+; CHECK: G2:
+
+@G3 = constant i32 9401
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: G3:
+
+@G4 = global i32* @G1
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G4:
+
+@G5 = constant i32* @G1
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G5:
+
+@G6 = global i32* @G8
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G6:
+
+@G7 = constant i32* @G8
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G7:
+
+@G8 = internal global i32 9312
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G8:
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
new file mode 100644
index 0000000..adfea21
--- /dev/null
+++ b/test/CodeGen/XCore/load.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: not grep zext %t1.s
+; RUN: not grep sext %t1.s
+; RUN: grep "ldw" %t1.s | count 2
+; RUN: grep "ld16s" %t1.s | count 1
+; RUN: grep "ld8u" %t1.s | count 1
+
+define i32 @load32(i32* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load32_imm(i32* %p) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load16(i16* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	%1 = load i16* %0, align 2
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+
+define i32 @load8(i8* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	%1 = load i8* %0, align 1
+	%2 = zext i8 %1 to i32
+	ret i32 %2
+}
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
index 88d9d7f..a08471f 100644
--- a/test/CodeGen/XCore/log.ll
+++ b/test/CodeGen/XCore/log.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl logf" %t1.s | count 1
 ; RUN: grep "bl log" %t1.s | count 2
 declare double @llvm.log.f64(double)
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
index f844d8f..a72b8bf 100644
--- a/test/CodeGen/XCore/log10.ll
+++ b/test/CodeGen/XCore/log10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log10f" %t1.s | count 1
 ; RUN: grep "bl log10" %t1.s | count 2
 declare double @llvm.log10.f64(double)
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
index b8a3dbd..d257433 100644
--- a/test/CodeGen/XCore/log2.ll
+++ b/test/CodeGen/XCore/log2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log2f" %t1.s | count 1
 ; RUN: grep "bl log2" %t1.s | count 2
 declare double @llvm.log2.f64(double)
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
index a7b6318..b461185 100644
--- a/test/CodeGen/XCore/pow.ll
+++ b/test/CodeGen/XCore/pow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl powf" %t1.s | count 1
 ; RUN: grep "bl pow" %t1.s | count 2
 declare double @llvm.pow.f64(double, double)
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
index 30e6d7e..de31cbe 100644
--- a/test/CodeGen/XCore/powi.ll
+++ b/test/CodeGen/XCore/powi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl __powidf2" %t1.s | count 1
 ; RUN: grep "bl __powisf2" %t1.s | count 1
 declare double @llvm.powi.f64(double, i32)
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 888ccdf..9a2f5b3 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=xcore > %t
+; RUN: llc < %s -march=xcore > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/XCore/sext.ll b/test/CodeGen/XCore/sext.ll
new file mode 100644
index 0000000..9cd4ad6
--- /dev/null
+++ b/test/CodeGen/XCore/sext.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @sext1(i32 %a) {
+	%1 = trunc i32 %a to i1
+	%2 = sext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext1:
+; CHECK: sext r0, 1
+
+define i32 @sext2(i32 %a) {
+	%1 = trunc i32 %a to i2
+	%2 = sext i2 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext2:
+; CHECK: sext r0, 2
+
+define i32 @sext8(i32 %a) {
+	%1 = trunc i32 %a to i8
+	%2 = sext i8 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext8:
+; CHECK: sext r0, 8
+
+define i32 @sext16(i32 %a) {
+	%1 = trunc i32 %a to i16
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext16:
+; CHECK: sext r0, 16
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
index 41aab67..ced026f 100644
--- a/test/CodeGen/XCore/sin.ll
+++ b/test/CodeGen/XCore/sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sinf" %t1.s | count 1
 ; RUN: grep "bl sin" %t1.s | count 2
 declare double @llvm.sin.f64(double)
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
index 221d1ac..364d1a1 100644
--- a/test/CodeGen/XCore/sqrt.ll
+++ b/test/CodeGen/XCore/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sqrtf" %t1.s | count 1
 ; RUN: grep "bl sqrt" %t1.s | count 2
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
new file mode 100644
index 0000000..2213743
--- /dev/null
+++ b/test/CodeGen/XCore/store.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: grep "stw" %t1.s | count 2
+; RUN: grep "st16" %t1.s | count 1
+; RUN: grep "st8" %t1.s | count 1
+
+define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store32_imm(i32* %p, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	store i16 %val, i16* %0, align 2
+	ret void
+}
+
+define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	store i8 %val, i8* %0, align 1
+	ret void
+}
diff --git a/test/CodeGen/XCore/tls.ll b/test/CodeGen/XCore/tls.ll
new file mode 100644
index 0000000..ed41afa
--- /dev/null
+++ b/test/CodeGen/XCore/tls.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G() {
+entry:
+; CHECK: addr_G:
+; CHECK: get r11, id
+	ret i32* @G
+}
+
+@G = thread_local global i32 15
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G:
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index b3d3bc2..45f886d 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "ecallf" %t1.s | count 1
 ; RUN: grep "ldc" %t1.s | count 1
 define i32 @test() noreturn nounwind  {
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
new file mode 100644
index 0000000..0ee8e1c
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_load" %t1.s | count 1
+; RUN: grep ld16s %t1.s | count 2
+; RUN: grep ldw %t1.s | count 2
+; RUN: grep shl %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+; RUN: grep zext %t1.s | count 1
+; RUN: grep "or " %t1.s | count 2
+
+; Byte aligned load. Expands to call to __misaligned_load.
+define i32 @align1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+; Half word aligned load. Expands to two 16bit loads.
+define i32 @align2(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+@a = global [5 x i8] zeroinitializer, align 4
+
+; Constant offset from word aligned base. Expands to two 32bit loads.
+define i32 @align3() nounwind {
+entry:
+	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	ret i32 %0
+}
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
new file mode 100644
index 0000000..62078e6
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_store" %t1.s | count 1
+; RUN: grep st16 %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+
+; Byte aligned store. Expands to call to __misaligned_store.
+define void @align1(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 1
+	ret void
+}
+
+; Half word aligned store. Expands to two 16bit stores.
+define void @align2(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 2
+	ret void
+}
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
new file mode 100644
index 0000000..493ca6a
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl memmove" %t1.s | count 1
+; RUN: grep "ldc r., 8" %t1.s | count 1
+
+; Unaligned load / store pair. Should be combined into a memmove
+; of size 8
+define void @f(i64* %dst, i64* %src) nounwind {
+entry:
+	%0 = load i64* %src, align 1
+	store i64 %0, i64* %dst, align 1
+	ret void
+}