39 files changed, 2123 insertions, 40 deletions
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index 0003a17..b95ac68 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -9,9 +9,8 @@ target triple = "powerpc-apple-darwin11.0"
 
 define void @foo() nounwind ssp {
 entry:
-; Better: mtctr r12
-; CHECK: mr r12, [[REG:r[0-9]+]]
-; CHECK: mtctr [[REG]]
+; CHECK: mtctr r12
+; CHECK: bctrl
   %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
   call void (...)* %0() nounwind
   br label %return
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
new file mode 100644
index 0000000..9d2e390
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test check if the TOC entry symbol name won't clash with global .LC0
+; and .LC2 symbols defined in the module.
+
+@.LC0 = internal global [5 x i8] c".LC0\00"
+@.LC2 = internal global [5 x i8] c".LC2\00"
+
+define i32 @foo(double %X, double %Y) nounwind readnone {
+  ; The 1.0 and 3.0 constants generate two TOC entries
+  %cmp = fcmp oeq double %X, 1.000000e+00
+  %conv = zext i1 %cmp to i32
+  %cmp1 = fcmp oeq double %Y, 3.000000e+00
+  %conv2 = zext i1 %cmp1 to i32
+  %add = add nsw i32 %conv2, %conv
+  ret i32 %add
+}
+
+; Check the creation of 2 .tc entries for both double constants. They
+; should be .LC1 and .LC3 to avoid name clash with global constants
+; .LC0 and .LC2
+; CHECK: .LC{{[13]}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK: .LC{{[13]}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
diff --git a/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll
new file mode 100644
index 0000000..41533a8
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test(i64 %n) nounwind {
+entry:
+  %0 = alloca i8, i64 %n, align 1
+  %1 = alloca i8, i64 %n, align 1
+  call void @use(i8* %0, i8* %1) nounwind
+  ret void
+}
+
+declare void @use(i8*, i8*)
+
+; Check we actually have two instances of dynamic stack allocation,
+; identified by the stdux used to update the back-chain link.
+; CHECK: stdux
+; CHECK: stdux
diff --git a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
new file mode 100644
index 0000000..f841c5f
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mattr=+altivec < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test(<16 x i8> %v) nounwind {
+entry:
+  %0 = bitcast <16 x i8> %v to i128
+  %1 = lshr i128 %0, 96
+  %2 = trunc i128 %1 to i32
+  ret i32 %2
+}
+
+; Verify that bitcast handles big-endian platforms correctly
+; by checking we load the result from the correct offset
+
+; CHECK: addi [[REGISTER:[0-9]+]], 1, -16
+; CHECK: stvx 2, 0, [[REGISTER]]
+; CHECK: lwz 3, -16(1)
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/asm-Zy.ll b/test/CodeGen/PowerPC/asm-Zy.ll
new file mode 100644
index 0000000..691165f
--- /dev/null
+++ b/test/CodeGen/PowerPC/asm-Zy.ll
@@ -0,0 +1,14 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+define i32 @zytest(i32 %a) nounwind {
+entry:
+; CHECK: @zytest
+  %r = call i32 asm "lwbrx $0, ${1:y}", "=r,Z"(i32 %a) nounwind, !srcloc !0
+  ret i32 %r
+; CHECK: lwbrx 3, 0,
+}
+
+!0 = metadata !{i32 101688}
+
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 9a456b6..638059a 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,10 +2,10 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li {{[53]}}, 0
+; CHECK: li 3, 0
 ; CHECK: li 4, 2
+; CHECK: li 5, 0
 ; CHECK: li 6, 3
-; CHECK: mr {{[53]}}, {{[53]}}
 
 define void @foo() {
   call void @bar(i64 2, i64 3)
diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll
deleted file mode 100644
index 386c59e..0000000
--- a/test/CodeGen/PowerPC/bl8_elf_nop.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck  %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-declare i32 @clock() nounwind
-
-define i32 @func() {
-entry:
-  %call = call i32 @clock() nounwind
-  %call2 = add i32 %call, 7
-  ret i32 %call2
-}
-
-; CHECK: bl clock
-; CHECK-NEXT: nop
-
diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll
index cc80f83..f19175c 100644
--- a/test/CodeGen/PowerPC/coalesce-ext.ll
+++ b/test/CodeGen/PowerPC/coalesce-ext.ll
@@ -13,5 +13,6 @@ define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
   store volatile i32 %D, i32* %P
   ; Reuse low bits of extended register, don't extend live range of SUM.
   ; CHECK: stw [[EXT]]
-  ret i32 %D
+  %R = add i32 %D, %D
+  ret i32 %R
 }
diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
new file mode 100644
index 0000000..afa1ea8
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+
+@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
+
+define void @test(i32 %count) nounwind {
+entry:
+; CHECK: crxor 6, 6, 6
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %cmp2 = icmp sgt i32 %count, 0
+  br i1 %cmp2, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+; CHECK: crxor 6, 6, 6
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %inc = add nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
new file mode 100644
index 0000000..3e98dbd
--- /dev/null
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -0,0 +1,49 @@
+; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
+; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
+
+declare void @foo()
+
+define i32 @test_cr2() nounwind {
+entry:
+  %ret = alloca i32, align 4
+  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
+  store i32 %0, i32* %ret, align 4
+  call void @foo()
+  %1 = load i32* %ret, align 4
+  ret i32 %1
+}
+
+; PPC32: mfcr 12
+; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
+; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: mtcrf 32, 12
+
+; PPC64: mfcr 12
+; PPC64-NEXT: stw 12, 8(1)
+; PPC64: lwz 12, 8(1)
+; PPC64-NEXT: mtcrf 32, 12
+
+define i32 @test_cr234() nounwind {
+entry:
+  %ret = alloca i32, align 4
+  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09cmp 3,$2,$2\0A\09cmp 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
+  store i32 %0, i32* %ret, align 4
+  call void @foo()
+  %1 = load i32* %ret, align 4
+  ret i32 %1
+}
+
+; PPC32: mfcr 12
+; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
+; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: mtcrf 32, 12
+; PPC32-NEXT: mtcrf 16, 12
+; PPC32-NEXT: mtcrf 8, 12
+
+; PPC64: mfcr 12
+; PPC64-NEXT: stw 12, 8(1)
+; PPC64: lwz 12, 8(1)
+; PPC64-NEXT: mtcrf 32, 12
+; PPC64-NEXT: mtcrf 16, 12
+; PPC64-NEXT: mtcrf 8, 12
+
diff --git a/test/CodeGen/PowerPC/emptystruct.ll b/test/CodeGen/PowerPC/emptystruct.ll
new file mode 100644
index 0000000..36b4abd
--- /dev/null
+++ b/test/CodeGen/PowerPC/emptystruct.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; This tests correct handling of empty aggregate parameters and return values.
+; An empty parameter passed by value does not consume a protocol register or
+; a parameter save area doubleword.  An empty parameter passed by reference
+; is treated as any other pointer parameter.  An empty aggregate return value 
+; is treated as any other aggregate return value, passed via address as a 
+; hidden parameter in GPR3.  In this example, GPR3 contains the return value
+; address, GPR4 contains the address of e2, and e1 and e3 are not passed or
+; received.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.empty = type {}
+
+define void @callee(%struct.empty* noalias sret %agg.result, %struct.empty* byval %a1, %struct.empty* %a2, %struct.empty* byval %a3) nounwind {
+entry:
+  %a2.addr = alloca %struct.empty*, align 8
+  store %struct.empty* %a2, %struct.empty** %a2.addr, align 8
+  %0 = load %struct.empty** %a2.addr, align 8
+  %1 = bitcast %struct.empty* %agg.result to i8*
+  %2 = bitcast %struct.empty* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; CHECK: callee:
+; CHECK: std 4,
+; CHECK: std 3,
+; CHECK-NOT: std 5,
+; CHECK-NOT: std 6,
+; CHECK: blr
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @caller(%struct.empty* noalias sret %agg.result) nounwind {
+entry:
+  %e1 = alloca %struct.empty, align 1
+  %e2 = alloca %struct.empty, align 1
+  %e3 = alloca %struct.empty, align 1
+  call void @callee(%struct.empty* sret %agg.result, %struct.empty* byval %e1, %struct.empty* %e2, %struct.empty* byval %e3)
+  ret void
+}
+
+; CHECK: caller:
+; CHECK: addi 4,
+; CHECK: std 3,
+; CHECK-NOT: std 5,
+; CHECK-NOT: std 6,
+; CHECK: bl callee
diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll
new file mode 100644
index 0000000..b5631a1
--- /dev/null
+++ b/test/CodeGen/PowerPC/floatPSA.ll
@@ -0,0 +1,97 @@
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; This verifies that single-precision floating point values that can't
+; be passed in registers are stored in the rightmost word of the parameter
+; save area slot.  There are 13 architected floating-point registers, so
+; the 14th is passed in storage.  The address of the 14th argument is
+; 48 (fixed size of the linkage area) + 13 * 8 (first 13 args) + 4
+; (offset to second word) = 156.
+
+define float @bar(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m, float %n) nounwind {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  %c.addr = alloca float, align 4
+  %d.addr = alloca float, align 4
+  %e.addr = alloca float, align 4
+  %f.addr = alloca float, align 4
+  %g.addr = alloca float, align 4
+  %h.addr = alloca float, align 4
+  %i.addr = alloca float, align 4
+  %j.addr = alloca float, align 4
+  %k.addr = alloca float, align 4
+  %l.addr = alloca float, align 4
+  %m.addr = alloca float, align 4
+  %n.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  store float %c, float* %c.addr, align 4
+  store float %d, float* %d.addr, align 4
+  store float %e, float* %e.addr, align 4
+  store float %f, float* %f.addr, align 4
+  store float %g, float* %g.addr, align 4
+  store float %h, float* %h.addr, align 4
+  store float %i, float* %i.addr, align 4
+  store float %j, float* %j.addr, align 4
+  store float %k, float* %k.addr, align 4
+  store float %l, float* %l.addr, align 4
+  store float %m, float* %m.addr, align 4
+  store float %n, float* %n.addr, align 4
+  %0 = load float* %n.addr, align 4
+  ret float %0
+}
+
+; CHECK: lfs {{[0-9]+}}, 156(1)
+
+define float @foo() nounwind {
+entry:
+  %a = alloca float, align 4
+  %b = alloca float, align 4
+  %c = alloca float, align 4
+  %d = alloca float, align 4
+  %e = alloca float, align 4
+  %f = alloca float, align 4
+  %g = alloca float, align 4
+  %h = alloca float, align 4
+  %i = alloca float, align 4
+  %j = alloca float, align 4
+  %k = alloca float, align 4
+  %l = alloca float, align 4
+  %m = alloca float, align 4
+  %n = alloca float, align 4
+  store float 1.000000e+00, float* %a, align 4
+  store float 2.000000e+00, float* %b, align 4
+  store float 3.000000e+00, float* %c, align 4
+  store float 4.000000e+00, float* %d, align 4
+  store float 5.000000e+00, float* %e, align 4
+  store float 6.000000e+00, float* %f, align 4
+  store float 7.000000e+00, float* %g, align 4
+  store float 8.000000e+00, float* %h, align 4
+  store float 9.000000e+00, float* %i, align 4
+  store float 1.000000e+01, float* %j, align 4
+  store float 1.100000e+01, float* %k, align 4
+  store float 1.200000e+01, float* %l, align 4
+  store float 1.300000e+01, float* %m, align 4
+  store float 1.400000e+01, float* %n, align 4
+  %0 = load float* %a, align 4
+  %1 = load float* %b, align 4
+  %2 = load float* %c, align 4
+  %3 = load float* %d, align 4
+  %4 = load float* %e, align 4
+  %5 = load float* %f, align 4
+  %6 = load float* %g, align 4
+  %7 = load float* %h, align 4
+  %8 = load float* %i, align 4
+  %9 = load float* %j, align 4
+  %10 = load float* %k, align 4
+  %11 = load float* %l, align 4
+  %12 = load float* %m, align 4
+  %13 = load float* %n, align 4
+  %call = call float @bar(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13)
+  ret float %call
+}
+
+; Note that stw is used instead of stfs because the value is a simple
+; constant that can be created with a load-immediate in a GPR.
+; CHECK: stw {{[0-9]+}}, 156(1)
+
diff --git a/test/CodeGen/PowerPC/fsl-e500mc.ll b/test/CodeGen/PowerPC/fsl-e500mc.ll
new file mode 100644
index 0000000..09b7e41
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsl-e500mc.ll
@@ -0,0 +1,22 @@
+;
+; Test support for Freescale e500mc and its higher memcpy inlining thresholds.
+;
+; RUN: llc -mcpu=e500mc < %s 2>&1 | FileCheck %s
+; CHECK-NOT: not a recognized processor for this target
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-fsl-linux"
+
+%struct.teststruct = type { [12 x i32], i32 }
+
+define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
+entry:
+; CHECK: @copy
+; CHECK-NOT: bl memcpy
+  %0 = bitcast %struct.teststruct* %agg.result to i8*
+  %1 = bitcast %struct.teststruct* %in to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 52, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/PowerPC/fsl-e5500.ll b/test/CodeGen/PowerPC/fsl-e5500.ll
new file mode 100644
index 0000000..d47d8c8
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsl-e5500.ll
@@ -0,0 +1,22 @@
+;
+; Test support for Freescale e5500 and its higher memcpy inlining thresholds.
+;
+; RUN: llc -mcpu=e5500 < %s 2>&1 | FileCheck %s
+; CHECK-NOT: not a recognized processor for this target
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-fsl-linux"
+
+%struct.teststruct = type { [24 x i32], i32 }
+
+define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
+entry:
+; CHECK: @copy
+; CHECK-NOT: bl memcpy
+  %0 = bitcast %struct.teststruct* %agg.result to i8*
+  %1 = bitcast %struct.teststruct* %in to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 100, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
new file mode 100644
index 0000000..5a0c072
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test(i64 %x) nounwind readnone {
+entry:
+  %conv = sitofp i64 %x to float
+  ret float %conv
+}
+
+; Verify that we get the code sequence needed to avoid double-rounding.
+; Note that only parts of the sequence are checked for here, to allow
+; for minor code generation differences.
+
+; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
+; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
+; CHECK: cmpldi 0, [[REGISTER]], 1
+; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+
+
+; Also check that with -enable-unsafe-fp-math we do not get that extra
+; code sequence.  Simply verify that there is no "isel" present.
+
+; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; CHECK-UNSAFE-NOT: isel
+
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
index e1ff82d..59c3388 100644
--- a/test/CodeGen/PowerPC/inlineasm-copy.ll
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=ppc32 | not grep mr
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs | FileCheck %s
 
+; CHECK-NOT: mr
 define i32 @test(i32 %Y, i32 %X) {
 entry:
         %tmp = tail call i32 asm "foo $0", "=r"( )              ; <i32> [#uses=1]
@@ -12,3 +13,9 @@ entry:
         ret i32 %tmp1
 }
 
+; CHECK: test3
+define i32 @test3(i32 %Y, i32 %X) {
+entry:
+        %tmp1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "foo $0, $1", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"( i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y )                ; <i32> [#uses=1]
+       ret i32 1
+}
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 6c82723..d2887b9 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc64 | grep __floatditf
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+; CHECK-NOT: __floatditf
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
new file mode 100644
index 0000000..62aa7cf
--- /dev/null
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; This tests receiving and re-passing parameters consisting of structures
+; of size 3, 5, 6, and 7.  They are to be found/placed right-adjusted in
+; the parameter registers.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S3 = type { [3 x i8] }
+%struct.S5 = type { [5 x i8] }
+%struct.S6 = type { [6 x i8] }
+%struct.S7 = type { [7 x i8] }
+
+define void @test(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) nounwind {
+entry:
+  call void @check(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7)
+  ret void
+}
+
+; CHECK: std 6, 216(1)
+; CHECK: std 5, 208(1)
+; CHECK: std 4, 200(1)
+; CHECK: std 3, 192(1)
+; CHECK: lbz {{[0-9]+}}, 199(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
+; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: sth {{[0-9]+}}, 53(1)
+; CHECK: lbz {{[0-9]+}}, 207(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
+; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stw {{[0-9]+}}, 59(1)
+; CHECK: lhz {{[0-9]+}}, 214(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
+; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: stw {{[0-9]+}}, 66(1)
+; CHECK: lbz {{[0-9]+}}, 223(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: lhz {{[0-9]+}}, 221(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
+; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stw {{[0-9]+}}, 73(1)
+; CHECK: ld 6, 72(1)
+; CHECK: ld 5, 64(1)
+; CHECK: ld 4, 56(1)
+; CHECK: ld 3, 48(1)
+
+declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)
diff --git a/test/CodeGen/PowerPC/misched.ll b/test/CodeGen/PowerPC/misched.ll
new file mode 100644
index 0000000..d6fb3b3
--- /dev/null
+++ b/test/CodeGen/PowerPC/misched.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -enable-misched -verify-machineinstrs
+; PR14302
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+@b = external global [16000 x double], align 32
+
+define void @pr14302() nounwind {
+entry:
+  tail call void @putchar() nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.body24.i
+
+for.body24.i:                                     ; preds = %for.body24.i, %for.body
+  store double 1.000000e+00, double* undef, align 8
+  br i1 undef, label %for.body24.i58, label %for.body24.i
+
+for.body24.i58:                                   ; preds = %for.body24.i58, %for.body24.i
+  %arrayidx26.i55.1 = getelementptr inbounds [16000 x double]* @b, i64 0, i64 undef
+  store double 1.000000e+00, double* %arrayidx26.i55.1, align 8
+  br i1 undef, label %for.body24.i64, label %for.body24.i58
+
+for.body24.i64:                                   ; preds = %for.body24.i64, %for.body24.i58
+  %exitcond.2489 = icmp eq i32 0, 16000
+  br i1 %exitcond.2489, label %for.body24.i70, label %for.body24.i64
+
+for.body24.i70:                                   ; preds = %for.body24.i70, %for.body24.i64
+  br i1 undef, label %for.body24.i76, label %for.body24.i70
+
+for.body24.i76:                                   ; preds = %for.body24.i76, %for.body24.i70
+  br i1 undef, label %set1d.exit77, label %for.body24.i76
+
+set1d.exit77:                                     ; preds = %for.body24.i76
+  br label %for.body29
+
+for.body29:                                       ; preds = %for.body29, %set1d.exit77
+  br i1 undef, label %for.end35, label %for.body29
+
+for.end35:                                        ; preds = %for.body29
+  ret void
+}
+
+declare void @putchar()
diff --git a/test/CodeGen/PowerPC/novrsave.ll b/test/CodeGen/PowerPC/novrsave.ll
new file mode 100644
index 0000000..a70576a
--- /dev/null
+++ b/test/CodeGen/PowerPC/novrsave.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu   < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; This verifies that the code to update VRSAVE has been removed for SVR4.
+
+define <4 x float> @bar(<4 x float> %v) nounwind {
+entry:
+  %v.addr = alloca <4 x float>, align 16
+  store <4 x float> %v, <4 x float>* %v.addr, align 16
+  %0 = load <4 x float>* %v.addr, align 16
+  ret <4 x float> %0
+}
+
+; CHECK-NOT: mfspr
+; CHECK-NOT: mtspr
diff --git a/test/CodeGen/PowerPC/ppc64-abi-extend.ll b/test/CodeGen/PowerPC/ppc64-abi-extend.ll
new file mode 100644
index 0000000..8baf1c6
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-abi-extend.ll
@@ -0,0 +1,97 @@
+; Verify that i32 argument/return values are extended to i64
+
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@si = common global i32 0, align 4
+@ui = common global i32 0, align 4
+
+declare void @arg_si(i32 signext)
+declare void @arg_ui(i32 zeroext)
+
+declare signext i32 @ret_si()
+declare zeroext i32 @ret_ui()
+
+define void @pass_arg_si() nounwind {
+entry:
+  %0 = load i32* @si, align 4
+  tail call void @arg_si(i32 signext %0) nounwind
+  ret void
+}
+; CHECK: @pass_arg_si
+; CHECK: lwa 3,
+; CHECK: bl arg_si
+
+define void @pass_arg_ui() nounwind {
+entry:
+  %0 = load i32* @ui, align 4
+  tail call void @arg_ui(i32 zeroext %0) nounwind
+  ret void
+}
+; CHECK: @pass_arg_ui
+; CHECK: lwz 3,
+; CHECK: bl arg_ui
+
+define i64 @use_arg_si(i32 signext %x) nounwind readnone {
+entry:
+  %conv = sext i32 %x to i64
+  ret i64 %conv
+}
+; CHECK: @use_arg_si
+; CHECK: %entry
+; CHECK-NEXT: blr
+
+define i64 @use_arg_ui(i32 zeroext %x) nounwind readnone {
+entry:
+  %conv = zext i32 %x to i64
+  ret i64 %conv
+}
+; CHECK: @use_arg_ui
+; CHECK: %entry
+; CHECK-NEXT: blr
+
+define signext i32 @pass_ret_si() nounwind readonly {
+entry:
+  %0 = load i32* @si, align 4
+  ret i32 %0
+}
+; CHECK: @pass_ret_si
+; CHECK: lwa 3,
+; CHECK: blr
+
+define zeroext i32 @pass_ret_ui() nounwind readonly {
+entry:
+  %0 = load i32* @ui, align 4
+  ret i32 %0
+}
+; CHECK: @pass_ret_ui
+; CHECK: lwz 3,
+; CHECK: blr
+
+define i64 @use_ret_si() nounwind {
+entry:
+  %call = tail call signext i32 @ret_si() nounwind
+  %conv = sext i32 %call to i64
+  ret i64 %conv
+}
+; CHECK: @use_ret_si
+; CHECK: bl ret_si
+; This is to verify the return register (3) set up by the ret_si
+; call is passed on unmodified as return value of use_ret_si.
+; CHECK-NOT: 3
+; CHECK: blr
+
+define i64 @use_ret_ui() nounwind {
+entry:
+  %call = tail call zeroext i32 @ret_ui() nounwind
+  %conv = zext i32 %call to i64
+  ret i64 %conv
+}
+; CHECK: @use_ret_ui
+; CHECK: bl ret_ui
+; This is to verify the return register (3) set up by the ret_ui
+; call is passed on unmodified as return value of use_ret_ui.
+; CHECK-NOT: 3
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
new file mode 100644
index 0000000..10b70d0
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; Verify internal alignment of long double in a struct.  The double
+; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
+; the long double.  Check that these are stored to proper locations
+; in the parameter save area and loaded from there for return in FPR1/2.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S = type { double, ppc_fp128 }
+
+define ppc_fp128 @test(%struct.S* byval %x) nounwind {
+entry:
+  %b = getelementptr inbounds %struct.S* %x, i32 0, i32 1
+  %0 = load ppc_fp128* %b, align 16
+  ret ppc_fp128 %0
+}
+
+; CHECK: std 6, 72(1)
+; CHECK: std 5, 64(1)
+; CHECK: std 4, 56(1)
+; CHECK: std 3, 48(1)
+; CHECK: lfd 1, 64(1)
+; CHECK: lfd 2, 72(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
new file mode 100644
index 0000000..c382edbb
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo() nounwind readnone noinline {
+  ret void
+}
+
+define weak void @foo_weak() nounwind {
+  ret void
+}
+
+; Calls to local function does not require the TOC restore 'nop'
+define void @test_direct() nounwind readnone {
+; CHECK: test_direct:
+  tail call void @foo() nounwind
+; CHECK: bl foo
+; CHECK-NOT: nop
+  ret void
+}
+
+; Calls to weak function requires a TOC restore 'nop' because they
+; may be overridden in a different module.
+define void @test_weak() nounwind readnone {
+; CHECK: test_weak:
+  tail call void @foo_weak() nounwind
+; CHECK: bl foo
+; CHECK-NEXT: nop
+  ret void
+}
+
+; Indirect calls requires a full stub creation
+define void @test_indirect(void ()* nocapture %fp) nounwind {
+; CHECK: test_indirect:
+  tail call void %fp() nounwind
+; CHECK: ld [[FP:[0-9]+]], 0(3)
+; CHECK: ld 11, 16(3)
+; CHECK: ld 2, 8(3)
+; CHECK-NEXT: mtctr [[FP]]
+; CHECK-NEXT: bctrl
+; CHECK-NEXT: ld 2, 40(1)
+  ret void
+}
+
+; Absolute vales should be have the TOC restore 'nop'
+define void @test_abs() nounwind {
+; CHECK: test_abs:
+  tail call void inttoptr (i64 1024 to void ()*)() nounwind
+; CHECK: bla 1024
+; CHECK-NEXT: nop
+  ret void
+}
+
+declare double @sin(double) nounwind
+
+; External functions call should also have a 'nop'
+define double @test_external(double %x) nounwind {
+; CHECK: test_external:
+  %call = tail call double @sin(double %x) nounwind
+; CHECK: bl sin
+; CHECK-NEXT: nop
+  ret double %call
+}
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
deleted file mode 100644
index d5c4d46..0000000
--- a/test/CodeGen/PowerPC/ppc64-ind-call.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-define void @test1() {
-entry:
-  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
-  unreachable
-}
-
-; CHECK: @test1
-; CHECK: ld 11, 0(3)
-; CHECK: ld 2, 8(3)
-; CHECK: bctrl
-; CHECK: ld 2, 40(1)
-
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
index e5aa1f1..e1d50ba 100644
--- a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -5,6 +5,7 @@
 ; CHECK-NEXT:	.align 3
 ; CHECK-NEXT:	.quad .L.test1
 ; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:   .quad 0
 ; CHECK-NEXT:	.text
 ; CHECK-NEXT: .L.test1:
 
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
new file mode 100644
index 0000000..a29bdcb
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@double_array = global [32 x double] zeroinitializer, align 8
+@number64 = global i64 10, align 8
+@internal_static_var.x = internal unnamed_addr global i64 0, align 8
+
+define i64 @access_int64(i64 %a) nounwind readonly {
+entry:
+; CHECK: access_int64:
+; CHECK-NEXT: .align  3
+; CHECK-NEXT: .quad   .L.access_int64
+; CHECK-NEXT: .quad   .TOC.@tocbase
+; CHECK-NEXT: .quad   0
+; CHECK-NEXT: .text
+  %0 = load i64* @number64, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = icmp eq i64 %0, %a
+  %conv1 = zext i1 %cmp to i64 
+  ret i64 %conv1
+}
+
+define i64 @internal_static_var(i64 %a) nounwind {
+entry:
+; CHECK: internal_static_var:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %0 = load i64* @internal_static_var.x, align 8
+  %cmp = icmp eq i64 %0, %a
+  %conv1 = zext i1 %cmp to i64 
+  ret i64 %conv1 
+}
+
+define i32 @access_double(double %a) nounwind readnone {
+entry:
+; CHECK: access_double:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = fcmp oeq double %a, 2.000000e+00
+  %conv = zext i1 %cmp to i32 
+  ret i32 %conv
+}
+
+
+define i32 @access_double_array(double %a, i32 %i) nounwind readonly {
+entry:
+; CHECK: access_double_array:
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+  %cmp = fcmp oeq double %0, %a
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; Check the creation of 4 .tc entries:
+; * int64_t global 'number64'
+; * double constant 2.0
+; * double array 'double_array'
+; * static int64_t 'x' accessed within '@internal_static_var'
+; CHECK: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
diff --git a/test/CodeGen/PowerPC/ppc64-zext.ll b/test/CodeGen/PowerPC/ppc64-zext.ll
new file mode 100644
index 0000000..eb55445
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-zext.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux"
+
+define i64 @fun(i32 %arg32) nounwind {
+entry:
+; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+  %o = zext i32 %arg32 to i64
+  ret i64 %o
+}
+
diff --git a/test/CodeGen/PowerPC/pr12757.ll b/test/CodeGen/PowerPC/pr12757.ll
new file mode 100644
index 0000000..c344656
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr12757.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @__flt_rounds() nounwind {
+entry:
+  %0 = tail call i64 asm sideeffect "mffs $0", "=f"() nounwind
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK: @__flt_rounds
+; CHECK: mffs
+
diff --git a/test/CodeGen/PowerPC/pr13641.ll b/test/CodeGen/PowerPC/pr13641.ll
new file mode 100644
index 0000000..c4d3f3a
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr13641.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo() nounwind {
+  ret void
+}
+
+; CHECK: blr
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .quad 0
diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll
new file mode 100644
index 0000000..3ae7385
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr13891.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.foo = type { i8, i8 }
+
+define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline {
+; CHECK: _Z5check3foos:
+; CHECK: sth 3, {{[0-9]+}}(1)
+; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+entry:
+  %0 = bitcast %struct.foo* %f to i16*
+  %1 = load i16* %0, align 2
+  %bf.val.sext = ashr i16 %1, 8
+  %cmp = icmp eq i16 %bf.val.sext, %i
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = sext i16 %bf.val.sext to i32
+  tail call void @exit(i32 %conv)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @exit(i32)
diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll
new file mode 100644
index 0000000..520921f
--- /dev/null
+++ b/test/CodeGen/PowerPC/remat-imm.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'test.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+
+@.str = private unnamed_addr constant [6 x i8] c"%d,%d\00", align 1
+
+define i32 @main() nounwind {
+entry:
+; CHECK: li 4, 128
+; CHECK-NOT: mr 4, {{.*}}
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
new file mode 100644
index 0000000..884d3a8
--- /dev/null
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -0,0 +1,227 @@
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+
+; FIXME: The code generation for packed structs is very poor because the
+; PowerPC target wrongly rejects all unaligned loads.  This test case will
+; need to be revised when that is fixed.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s1 = type { i8 }
+%struct.s2 = type { i16 }
+%struct.s4 = type { i32 }
+%struct.t1 = type { i8 }
+%struct.t3 = type <{ i16, i8 }>
+%struct.t5 = type <{ i32, i8 }>
+%struct.t6 = type <{ i32, i16 }>
+%struct.t7 = type <{ i32, i16, i8 }>
+%struct.s3 = type { i16, i8 }
+%struct.s5 = type { i32, i8 }
+%struct.s6 = type { i32, i16 }
+%struct.s7 = type { i32, i16, i8 }
+%struct.t2 = type <{ i16 }>
+%struct.t4 = type <{ i32 }>
+
+@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1
+@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2
+@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2
+@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4
+@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4
+@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4
+@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4
+@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1
+@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1
+@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1
+@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1
+@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1
+@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1
+@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1
+
+define i32 @caller1() nounwind {
+entry:
+  %p1 = alloca %struct.s1, align 1
+  %p2 = alloca %struct.s2, align 2
+  %p3 = alloca %struct.s3, align 2
+  %p4 = alloca %struct.s4, align 4
+  %p5 = alloca %struct.s5, align 4
+  %p6 = alloca %struct.s6, align 4
+  %p7 = alloca %struct.s7, align 4
+  %0 = bitcast %struct.s1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.s2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
+  %2 = bitcast %struct.s3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false)
+  %3 = bitcast %struct.s4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false)
+  %4 = bitcast %struct.s5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false)
+  %5 = bitcast %struct.s6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false)
+  %6 = bitcast %struct.s7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false)
+  %call = call i32 @callee1(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 119(1)
+; CHECK: sth {{[0-9]+}}, 126(1)
+; CHECK: stw {{[0-9]+}}, 132(1)
+; CHECK: stw {{[0-9]+}}, 140(1)
+; CHECK: std {{[0-9]+}}, 144(1)
+; CHECK: std {{[0-9]+}}, 152(1)
+; CHECK: std {{[0-9]+}}, 160(1)
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define internal i32 @callee1(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
+entry:
+  %z1.addr = alloca i32, align 4
+  %z2.addr = alloca i32, align 4
+  %z3.addr = alloca i32, align 4
+  %z4.addr = alloca i32, align 4
+  %z5.addr = alloca i32, align 4
+  %z6.addr = alloca i32, align 4
+  %z7.addr = alloca i32, align 4
+  %z8.addr = alloca i32, align 4
+  store i32 %z1, i32* %z1.addr, align 4
+  store i32 %z2, i32* %z2.addr, align 4
+  store i32 %z3, i32* %z3.addr, align 4
+  store i32 %z4, i32* %z4.addr, align 4
+  store i32 %z5, i32* %z5.addr, align 4
+  store i32 %z6, i32* %z6.addr, align 4
+  store i32 %z7, i32* %z7.addr, align 4
+  store i32 %z8, i32* %z8.addr, align 4
+  %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 2
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 4
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 4
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 4
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 4
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
+; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lwz {{[0-9]+}}, 140(1)
+; CHECK: lwz {{[0-9]+}}, 144(1)
+; CHECK: lwz {{[0-9]+}}, 152(1)
+; CHECK: lwz {{[0-9]+}}, 160(1)
+}
+
+define i32 @caller2() nounwind {
+entry:
+  %p1 = alloca %struct.t1, align 1
+  %p2 = alloca %struct.t2, align 1
+  %p3 = alloca %struct.t3, align 1
+  %p4 = alloca %struct.t4, align 1
+  %p5 = alloca %struct.t5, align 1
+  %p6 = alloca %struct.t6, align 1
+  %p7 = alloca %struct.t7, align 1
+  %0 = bitcast %struct.t1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.t2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
+  %2 = bitcast %struct.t3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false)
+  %3 = bitcast %struct.t4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false)
+  %4 = bitcast %struct.t5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false)
+  %5 = bitcast %struct.t6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false)
+  %6 = bitcast %struct.t7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false)
+  %call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 119(1)
+; CHECK: sth {{[0-9]+}}, 126(1)
+; CHECK: stb {{[0-9]+}}, 135(1)
+; CHECK: sth {{[0-9]+}}, 133(1)
+; CHECK: stw {{[0-9]+}}, 140(1)
+; CHECK: stb {{[0-9]+}}, 151(1)
+; CHECK: stw {{[0-9]+}}, 147(1)
+; CHECK: sth {{[0-9]+}}, 158(1)
+; CHECK: stw {{[0-9]+}}, 154(1)
+; CHECK: stb {{[0-9]+}}, 167(1)
+; CHECK: sth {{[0-9]+}}, 165(1)
+; CHECK: stw {{[0-9]+}}, 161(1)
+}
+
+define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
+entry:
+  %z1.addr = alloca i32, align 4
+  %z2.addr = alloca i32, align 4
+  %z3.addr = alloca i32, align 4
+  %z4.addr = alloca i32, align 4
+  %z5.addr = alloca i32, align 4
+  %z6.addr = alloca i32, align 4
+  %z7.addr = alloca i32, align 4
+  %z8.addr = alloca i32, align 4
+  store i32 %z1, i32* %z1.addr, align 4
+  store i32 %z2, i32* %z2.addr, align 4
+  store i32 %z3, i32* %z3.addr, align 4
+  store i32 %z4, i32* %z4.addr, align 4
+  store i32 %z5, i32* %z5.addr, align 4
+  store i32 %z6, i32* %z6.addr, align 4
+  store i32 %z7, i32* %z7.addr, align 4
+  store i32 %z8, i32* %z8.addr, align 4
+  %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 1
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 1
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 1
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 1
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 1
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 1
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: lbz {{[0-9]+}}, 149(1)
+; CHECK: lbz {{[0-9]+}}, 150(1)
+; CHECK: lbz {{[0-9]+}}, 147(1)
+; CHECK: lbz {{[0-9]+}}, 148(1)
+; CHECK: lbz {{[0-9]+}}, 133(1)
+; CHECK: lbz {{[0-9]+}}, 134(1)
+; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
+; CHECK: lwz {{[0-9]+}}, 140(1)
+; CHECK: lhz {{[0-9]+}}, 154(1)
+; CHECK: lhz {{[0-9]+}}, 156(1)
+; CHECK: lbz {{[0-9]+}}, 163(1)
+; CHECK: lbz {{[0-9]+}}, 164(1)
+; CHECK: lbz {{[0-9]+}}, 161(1)
+; CHECK: lbz {{[0-9]+}}, 162(1)
+}
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
new file mode 100644
index 0000000..ef706af
--- /dev/null
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -0,0 +1,213 @@
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+
+; FIXME: The code generation for packed structs is very poor because the
+; PowerPC target wrongly rejects all unaligned loads.  This test case will
+; need to be revised when that is fixed.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s1 = type { i8 }
+%struct.s2 = type { i16 }
+%struct.s4 = type { i32 }
+%struct.t1 = type { i8 }
+%struct.t3 = type <{ i16, i8 }>
+%struct.t5 = type <{ i32, i8 }>
+%struct.t6 = type <{ i32, i16 }>
+%struct.t7 = type <{ i32, i16, i8 }>
+%struct.s3 = type { i16, i8 }
+%struct.s5 = type { i32, i8 }
+%struct.s6 = type { i32, i16 }
+%struct.s7 = type { i32, i16, i8 }
+%struct.t2 = type <{ i16 }>
+%struct.t4 = type <{ i32 }>
+
+@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1
+@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2
+@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2
+@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4
+@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4
+@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4
+@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4
+@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1
+@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1
+@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1
+@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1
+@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1
+@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1
+@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1
+
+define i32 @caller1() nounwind {
+entry:
+  %p1 = alloca %struct.s1, align 1
+  %p2 = alloca %struct.s2, align 2
+  %p3 = alloca %struct.s3, align 2
+  %p4 = alloca %struct.s4, align 4
+  %p5 = alloca %struct.s5, align 4
+  %p6 = alloca %struct.s6, align 4
+  %p7 = alloca %struct.s7, align 4
+  %0 = bitcast %struct.s1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.s2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
+  %2 = bitcast %struct.s3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false)
+  %3 = bitcast %struct.s4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false)
+  %4 = bitcast %struct.s5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false)
+  %5 = bitcast %struct.s6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false)
+  %6 = bitcast %struct.s7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false)
+  %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
+  ret i32 %call
+
+; CHECK: ld 9, 128(31)
+; CHECK: ld 8, 136(31)
+; CHECK: ld 7, 144(31)
+; CHECK: lwz 6, 152(31)
+; CHECK: lwz 5, 160(31)
+; CHECK: lhz 4, 168(31)
+; CHECK: lbz 3, 176(31)
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define internal i32 @callee1(%struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
+entry:
+  %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 2
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 4
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 4
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 4
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 4
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: std 9, 96(1)
+; CHECK: std 8, 88(1)
+; CHECK: std 7, 80(1)
+; CHECK: stw 6, 76(1)
+; CHECK: stw 5, 68(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
+; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
+; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lwz {{[0-9]+}}, 76(1)
+; CHECK: lwz {{[0-9]+}}, 80(1)
+; CHECK: lwz {{[0-9]+}}, 88(1)
+; CHECK: lwz {{[0-9]+}}, 96(1)
+}
+
+define i32 @caller2() nounwind {
+entry:
+  %p1 = alloca %struct.t1, align 1
+  %p2 = alloca %struct.t2, align 1
+  %p3 = alloca %struct.t3, align 1
+  %p4 = alloca %struct.t4, align 1
+  %p5 = alloca %struct.t5, align 1
+  %p6 = alloca %struct.t6, align 1
+  %p7 = alloca %struct.t7, align 1
+  %0 = bitcast %struct.t1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.t2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
+  %2 = bitcast %struct.t3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false)
+  %3 = bitcast %struct.t4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false)
+  %4 = bitcast %struct.t5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false)
+  %5 = bitcast %struct.t6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false)
+  %6 = bitcast %struct.t7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false)
+  %call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 71(1)
+; CHECK: sth {{[0-9]+}}, 69(1)
+; CHECK: stb {{[0-9]+}}, 87(1)
+; CHECK: stw {{[0-9]+}}, 83(1)
+; CHECK: sth {{[0-9]+}}, 94(1)
+; CHECK: stw {{[0-9]+}}, 90(1)
+; CHECK: stb {{[0-9]+}}, 103(1)
+; CHECK: sth {{[0-9]+}}, 101(1)
+; CHECK: stw {{[0-9]+}}, 97(1)
+; CHECK: ld 9, 96(1)
+; CHECK: ld 8, 88(1)
+; CHECK: ld 7, 80(1)
+; CHECK: lwz 6, 152(31)
+; CHECK: ld 5, 64(1)
+; CHECK: lhz 4, 168(31)
+; CHECK: lbz 3, 176(31)
+}
+
+define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
+entry:
+  %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 1
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 1
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 1
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 1
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 1
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 1
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: std 9, 96(1)
+; CHECK: std 8, 88(1)
+; CHECK: std 7, 80(1)
+; CHECK: stw 6, 76(1)
+; CHECK: std 5, 64(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
+; CHECK: lbz {{[0-9]+}}, 85(1)
+; CHECK: lbz {{[0-9]+}}, 86(1)
+; CHECK: lbz {{[0-9]+}}, 83(1)
+; CHECK: lbz {{[0-9]+}}, 84(1)
+; CHECK: lbz {{[0-9]+}}, 69(1)
+; CHECK: lbz {{[0-9]+}}, 70(1)
+; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
+; CHECK: lwz {{[0-9]+}}, 76(1)
+; CHECK: lhz {{[0-9]+}}, 90(1)
+; CHECK: lhz {{[0-9]+}}, 92(1)
+; CHECK: lbz {{[0-9]+}}, 99(1)
+; CHECK: lbz {{[0-9]+}}, 100(1)
+; CHECK: lbz {{[0-9]+}}, 97(1)
+; CHECK: lbz {{[0-9]+}}, 98(1)
+}
diff --git a/test/CodeGen/PowerPC/varargs-struct-float.ll b/test/CodeGen/PowerPC/varargs-struct-float.ll
new file mode 100644
index 0000000..fb1835f
--- /dev/null
+++ b/test/CodeGen/PowerPC/varargs-struct-float.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.Sf1 = type { float }
+
+define void @foo(float inreg %s.coerce) nounwind {
+entry:
+  %s = alloca %struct.Sf1, align 4
+  %coerce.dive = getelementptr %struct.Sf1* %s, i32 0, i32 0
+  store float %s.coerce, float* %coerce.dive, align 1
+  %coerce.dive1 = getelementptr %struct.Sf1* %s, i32 0, i32 0
+  %0 = load float* %coerce.dive1, align 1
+  call void (i32, ...)* @testvaSf1(i32 1, float inreg %0)
+  ret void
+}
+
+; CHECK: stfs {{[0-9]+}}, 60(1)
+; CHECK: ld 4, 56(1)
+; CHECK: bl
+
+declare void @testvaSf1(i32, ...)
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
new file mode 100644
index 0000000..3180f46
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -0,0 +1,527 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector comparisons using altivec. For non native types, just basic
+; comparison instruction check is done. For altivec supported type (16i8,
+; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=)
+; are checked.
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <2 x i8> @v2si8_cmp(<2 x i8> %x, <2 x i8> %y) nounwind readnone {
+  %cmp = icmp eq <2 x i8> %x, %y
+  %sext = sext <2 x i1> %cmp to <2 x i8>
+  ret <2 x i8> %sext
+}
+; CHECK: v2si8_cmp:
+; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <4 x i8> @v4si8_cmp(<4 x i8> %x, <4 x i8> %y) nounwind readnone {
+  %cmp = icmp eq <4 x i8> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  ret <4 x i8> %sext
+}
+; CHECK: v4si8_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
+  %cmp = icmp eq <8 x i8> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i8>
+  ret <8 x i8> %sext
+}
+; CHECK: v8si8_cmp:
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+; Adicional tests for v16i8 since it is a altivec native type
+
+define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+  %cmp = icmp eq <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16si8_cmp_eq:
+; CHECK: vcmpequb 2, 2, 3
+
+define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:     v16si8_cmp_ne:
+; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
+; CHECK-NOR: vnor     2, [[RET]], [[RET]]
+
+define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16si8_cmp_le:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16ui8_cmp_le:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16si8_cmp_lt:
+; CHECK: vcmpgtsb 2, 3, 2
+
+define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16ui8_cmp_lt:
+; CHECK: vcmpgtub 2, 3, 2
+
+define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16si8_cmp_gt:
+; CHECK: vcmpgtsb 2, 2, 3
+
+define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16ui8_cmp_gt:
+; CHECK: vcmpgtub 2, 2, 3
+
+define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16si8_cmp_ge:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16ui8_cmp_ge:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+
+define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
+  %cmp = icmp eq <32 x i8> %x, %y
+  %sext = sext <32 x i1> %cmp to <32 x i8>
+  ret <32 x i8> %sext
+}
+; CHECK: v32si8_cmp:
+; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <2 x i16> @v2si16_cmp(<2 x i16> %x, <2 x i16> %y) nounwind readnone {
+  %cmp = icmp eq <2 x i16> %x, %y
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  ret <2 x i16> %sext
+}
+; CHECK: v2si16_cmp:
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
+  %cmp = icmp eq <4 x i16> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i16>
+  ret <4 x i16> %sext
+}
+; CHECK: v4si16_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+; Adicional tests for v8i16 since it is an altivec native type
+
+define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp eq <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8si16_cmp_eq:
+; CHECK: vcmpequh 2, 2, 3
+
+define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_ne:
+; CHECK:      vcmpequh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_le:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8ui16_cmp_le:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8si16_cmp_lt:
+; CHECK: vcmpgtsh 2, 3, 2
+
+define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8ui16_cmp_lt:
+; CHECK: vcmpgtuh 2, 3, 2
+
+define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8si16_cmp_gt:
+; CHECK: vcmpgtsh 2, 2, 3
+
+define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8ui16_cmp_gt:
+; CHECK: vcmpgtuh 2, 2, 3
+
+define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_ge:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8ui16_cmp_ge:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+
+define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
+  %cmp = icmp eq <16 x i16> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  ret <16 x i16> %sext
+}
+; CHECK: v16si16_cmp:
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <32 x i16> @v32si16_cmp(<32 x i16> %x, <32 x i16> %y) nounwind readnone {
+  %cmp = icmp eq <32 x i16> %x, %y
+  %sext = sext <32 x i1> %cmp to <32 x i16>
+  ret <32 x i16> %sext
+}
+; CHECK: v32si16_cmp:
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
+  %cmp = icmp eq <2 x i32> %x, %y
+  %sext = sext <2 x i1> %cmp to <2 x i32>
+  ret <2 x i32> %sext
+}
+; CHECK: v2si32_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+; Adicional tests for v4si32 since it is an altivec native type
+
+define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp eq <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4si32_cmp_eq:
+; CHECK: vcmpequw 2, 2, 3
+
+define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_ne:
+; CHECK:      vcmpequw [[RCMP:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RCMP]], [[RCMP]]
+
+define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_le:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4ui32_cmp_le:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4si32_cmp_lt:
+; CHECK: vcmpgtsw 2, 3, 2
+
+define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4ui32_cmp_lt:
+; CHECK: vcmpgtuw 2, 3, 2
+
+define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4si32_cmp_gt:
+; CHECK: vcmpgtsw 2, 2, 3
+
+define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4ui32_cmp_gt:
+; CHECK: vcmpgtuw 2, 2, 3
+
+define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_ge:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4ui32_cmp_ge:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+
+define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
+  %cmp = icmp eq <8 x i32> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  ret <8 x i32> %sext
+}
+; CHECK: v8si32_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <16 x i32> @v16si32_cmp(<16 x i32> %x, <16 x i32> %y) nounwind readnone {
+  %cmp = icmp eq <16 x i32> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i32>
+  ret <16 x i32> %sext
+}
+; CHECK: v16si32_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <32 x i32> @v32si32_cmp(<32 x i32> %x, <32 x i32> %y) nounwind readnone {
+  %cmp = icmp eq <32 x i32> %x, %y
+  %sext = sext <32 x i1> %cmp to <32 x i32>
+  ret <32 x i32> %sext
+}
+; CHECK: v32si32_cmp:
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+define <2 x float> @v2f32_cmp(<2 x float> %x, <2 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp oeq <2 x float> %x, %y
+  %sext = sext <2 x i1> %cmp to <2 x i32>
+  %0 = bitcast <2 x i32> %sext to <2 x float>
+  ret <2 x float> %0
+}
+; CHECK: v2f32_cmp:
+; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+
+; Adicional tests for v4f32 since it is a altivec native type
+
+define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp oeq <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_eq:
+; CHECK: vcmpeqfp 2, 2, 3
+
+define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp une <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK:      v4f32_cmp_ne:
+; CHECK:      vcmpeqfp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ole <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK:      v4f32_cmp_le:
+; CHECK:      vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp olt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_lt:
+; CHECK: vcmpgtfp 2, 3, 2
+
+define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp oge <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_ge:
+; CHECK: vcmpgefp 2, 2, 3
+
+define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ogt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_gt:
+; CHECK: vcmpgtfp 2, 2, 3
+
+
+define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp oeq <8 x float> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  %0 = bitcast <8 x i32> %sext to <8 x float>
+  ret <8 x float> %0
+}
+; CHECK: v8f32_cmp:
+; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vec_conv.ll b/test/CodeGen/PowerPC/vec_conv.ll
new file mode 100644
index 0000000..a475e94
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_conv.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mattr=+altivec < %s | FileCheck %s
+
+; Check vector float/int conversion using altivec.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@cte_float = global <4 x float> <float 6.5e+00, float 6.5e+00, float 6.5e+00, float 6.5e+00>, align 16
+@cte_int = global <4 x i32> <i32 6, i32 6, i32 6, i32 6>, align 16
+
+
+define void @v4f32_to_v4i32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
+entry:
+  %0 = load <4 x float>* @cte_float, align 16
+  %mul = fmul <4 x float> %0, %x
+  %1 = fptosi <4 x float> %mul to <4 x i32>
+  store <4 x i32> %1, <4 x i32>* %y, align 16
+  ret void
+}
+;CHECK: v4f32_to_v4i32:
+;CHECK: vctsxs {{[0-9]+}}, {{[0-9]+}}, 0
+
+
+define void @v4f32_to_v4u32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
+entry:
+  %0 = load <4 x float>* @cte_float, align 16
+  %mul = fmul <4 x float> %0, %x
+  %1 = fptoui <4 x float> %mul to <4 x i32>
+  store <4 x i32> %1, <4 x i32>* %y, align 16
+  ret void
+}
+;CHECK: v4f32_to_v4u32:
+;CHECK: vctuxs {{[0-9]+}}, {{[0-9]+}}, 0
+
+
+define void @v4i32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
+entry:
+  %0 = load <4 x i32>* @cte_int, align 16
+  %mul = mul <4 x i32> %0, %x
+  %1 = sitofp <4 x i32> %mul to <4 x float>
+  store <4 x float> %1, <4 x float>* %y, align 16
+  ret void
+}
+;CHECK: v4i32_to_v4f32:
+;CHECK: vcfsx {{[0-9]+}}, {{[0-9]+}}, 0
+
+
+define void @v4u32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
+entry:
+  %0 = load <4 x i32>* @cte_int, align 16
+  %mul = mul <4 x i32> %0, %x
+  %1 = uitofp <4 x i32> %mul to <4 x float>
+  store <4 x float> %1, <4 x float>* %y, align 16
+  ret void
+}
+;CHECK: v4u32_to_v4f32:
+;CHECK: vcfux {{[0-9]+}}, {{[0-9]+}}, 0
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
new file mode 100644
index 0000000..201c15b
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -0,0 +1,155 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector extend load expansion with altivec enabled.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Altivec does not provides an sext intruction, so it expands
+; a set of vector stores (stvx), bytes load/sign expand/store
+; (lbz/stb), and a final vector load (lvx) to load the result
+; extended vector.
+define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
+  %b = trunc <16 x i8> %a to <16 x i4>
+  %c = sext <16 x i4> %b to <16 x i8>
+  ret <16 x i8> %c
+}
+; CHECK: v16si8_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; The zero extend uses a more clever logic: a vector splat
+; and a logic and to set higher bits to 0.
+define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) {
+  %b = trunc <16 x i8> %a to <16 x i4>
+  %c = zext <16 x i4> %b to <16 x i8>
+  ret <16 x i8> %c
+}
+; CHECK:      v16si8_zext_in_reg:
+; CHECK:      vspltisb [[VMASK:[0-9]+]], 15
+; CHECK-NEXT: vand 2, 2, [[VMASK]]
+
+; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth).
+define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
+  %b = trunc <8 x i16> %a to <8 x i8>
+  %c = sext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %c
+}
+; CHECK: v8si16_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; Same as v8si16_sext_in_reg, but instead of creating the mask
+; with a splat, loads it from memory.
+define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) {
+  %b = trunc <8 x i16> %a to <8 x i8>
+  %c = zext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %c
+}
+; CHECK:      v8si16_zext_in_reg:
+; CHECK:      ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2)
+; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]]
+; CHECK-NEXT: vand 2, 2, [[VMASK]]
+
+; Same as v16si8_sext_in_reg, expands to load halfword (lha) and
+; store words (stw).
+define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
+  %b = trunc <4 x i32> %a to <4 x i16>
+  %c = sext <4 x i16> %b to <4 x i32>
+  ret <4 x i32> %c
+}
+; CHECK: v4si32_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; Same as v8si16_sext_in_reg.
+define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
+  %b = trunc <4 x i32> %a to <4 x i16>
+  %c = zext <4 x i16> %b to <4 x i32>
+  ret <4 x i32> %c
+}
+; CHECK:      v4si32_zext_in_reg:
+; CHECK:      vspltisw [[VMASK:[0-9]+]], -16
+; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]]
+; CHECK-NEXT: vand 2, 2, [[VMASK]]
diff --git a/test/CodeGen/PowerPC/vec_sqrt.ll b/test/CodeGen/PowerPC/vec_sqrt.ll
new file mode 100644
index 0000000..055da1a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_sqrt.ll
@@ -0,0 +1,71 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec,+fsqrt < %s | FileCheck %s
+
+; Check for vector sqrt expansion using floating-point types, since altivec
+; does not provide an fsqrt instruction for vector.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %val)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float> %val)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double> %val)
+
+define <2 x float> @v2f32_sqrt(<2 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <2 x float> @llvm.sqrt.v2f32 (<2 x float> %x)
+  ret <2 x float> %sqrt
+}
+; sqrt (<2 x float>) is promoted to sqrt (<4 x float>)
+; CHECK: v2f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_sqrt(<4 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %x)
+  ret <4 x float> %sqrt
+}
+; CHECK: v4f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_sqrt(<8 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %x)
+  ret <8 x float> %sqrt
+}
+; CHECK: v8f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_sqrt(<2 x double> %x) nounwind readnone {
+entry:
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %x)
+  ret <2 x double> %sqrt
+}
+; CHECK: v2f64_sqrt:
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_sqrt(<4 x double> %x) nounwind readnone {
+entry:
+  %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %x)
+  ret <4 x double> %sqrt
+}
+; CHECK: v4f64_sqrt:
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
new file mode 100644
index 0000000..7641017
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs  < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
+
+; This verifies that we generate correct spill/reload code for vector regs.
+
+define void @addrtaken(i32 %i, <4 x float> %w) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  %w.addr = alloca <4 x float>, align 16
+  store i32 %i, i32* %i.addr, align 4
+  store <4 x float> %w, <4 x float>* %w.addr, align 16
+  call void @foo(i32* %i.addr)
+  ret void
+}
+
+; CHECK: stvx 2, 0, 0
+; CHECK: lvx 2, 0, 0
+
+declare void @foo(i32*)