summaryrefslogtreecommitdiffstats
path: root/lib/Target/CellSPU
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/CellSPU')
-rw-r--r--lib/Target/CellSPU/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/CellSPU/AsmPrinter/Makefile17
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/Makefile2
-rw-r--r--lib/Target/CellSPU/README.txt2
-rw-r--r--lib/Target/CellSPU/SPU.h1
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td79
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp (renamed from lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp)37
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.cpp29
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp276
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.h (renamed from lib/Target/CellSPU/SPUFrameInfo.h)35
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp4
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h4
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp223
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp787
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h23
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp15
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td396
-rw-r--r--lib/Target/CellSPU/SPUMCAsmInfo.cpp3
-rw-r--r--lib/Target/CellSPU/SPUNodes.td18
-rw-r--r--lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--lib/Target/CellSPU/SPUOperands.td18
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp264
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h16
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td8
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp21
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h6
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp13
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h15
30 files changed, 1367 insertions, 1117 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 8a2b59a..0000000
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
- SPUAsmPrinter.cpp
- )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
deleted file mode 100644
index 4ec9d04..0000000
--- a/lib/Target/CellSPU/AsmPrinter/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUAsmPrinter
-
-# Hack: we need to include 'main' CellSPU target directory to grab
-# private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index ddfca37..a2a2ef1 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -12,16 +12,18 @@ tablegen(SPUGenSubtarget.inc -gen-subtarget)
tablegen(SPUGenCallingConv.inc -gen-callingconv)
add_llvm_target(CellSPUCodeGen
- SPUFrameInfo.cpp
+ SPUAsmPrinter.cpp
SPUHazardRecognizers.cpp
SPUInstrInfo.cpp
SPUISelDAGToDAG.cpp
SPUISelLowering.cpp
+ SPUFrameLowering.cpp
SPUMCAsmInfo.cpp
SPURegisterInfo.cpp
SPUSubtarget.cpp
SPUTargetMachine.cpp
SPUSelectionDAGInfo.cpp
+ SPUNopFiller.cpp
)
-target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index cbdbd3c..77c66be 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -16,6 +16,6 @@ BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
SPUGenInstrInfo.inc SPUGenDAGISel.inc \
SPUGenSubtarget.inc SPUGenCallingConv.inc
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 0e7ad35..3e7e0b6 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -55,7 +55,7 @@ TODO:
* i128 support:
* zero extension, any extension: done
- * sign extension: needed
+ * sign extension: done
* arithmetic operators (add, sub, mul, div): needed
* logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 1f21511..72f8430 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -23,6 +23,7 @@ namespace llvm {
class formatted_raw_ostream;
FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
extern Target TheCellSPUTarget;
}
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 069a182..5ef5716 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -54,8 +54,8 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
def CEQr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB))), 0xb)>;
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
// The i64 seteq fragment that does the vector comparison
def CEQv2i64compare:
@@ -67,12 +67,14 @@ def CEQv2i64compare:
// v2i64 seteq (equality): the setcc result is v4i32
multiclass CompareEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
- def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
}
defm I64EQ: CompareEqual64;
@@ -89,10 +91,12 @@ def : I64SELECTNegCond<setne, I64EQr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CLGTr64ugt:
- CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64eq:
- CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64compare:
CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
@@ -112,12 +116,14 @@ def CLGTv2i64compare:
multiclass CompareLogicalGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
}
defm I64LGT: CompareLogicalGreaterThan64;
@@ -144,12 +150,14 @@ def CLGEv2i64compare:
multiclass CompareLogicalGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
}
defm I64LGE: CompareLogicalGreaterEqual64;
@@ -168,10 +176,12 @@ def : I64SELECTNegCond<setult, I64LGEr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CGTr64sgt:
- CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64eq:
- CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64compare:
CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
@@ -191,12 +201,14 @@ def CGTv2i64compare:
multiclass CompareGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
}
defm I64GT: CompareLogicalGreaterThan64;
@@ -223,12 +235,12 @@ def CGEv2i64compare:
multiclass CompareGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
}
defm I64GE: CompareGreaterEqual64;
@@ -255,9 +267,9 @@ class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
@@ -275,11 +287,12 @@ class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS
+ v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
@@ -374,9 +387,9 @@ class v2i64_mul<dag rA, dag rB, dag rCGmask>:
rCGmask>;
def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 3e95531..4040461 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -46,10 +46,6 @@ namespace {
return "STI CBEA SPU Assembly Printer";
}
- SPUTargetMachine &getTM() {
- return static_cast<SPUTargetMachine&>(TM);
- }
-
/// printInstruction - This method is automatically generated by tablegen
/// from the instruction set description.
void printInstruction(const MachineInstr *MI, raw_ostream &OS);
@@ -64,15 +60,6 @@ namespace {
}
void printOp(const MachineOperand &MO, raw_ostream &OS);
- /// printRegister - Print register according to target requirements.
- ///
- void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
- unsigned RegNo = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
- "Not physreg??");
- O << getRegisterName(RegNo);
- }
-
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
@@ -93,17 +80,6 @@ namespace {
void
- printS7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- int value = MI->getOperand(OpNo).getImm();
- value = (value << (32 - 7)) >> (32 - 7);
-
- assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
- && "Invalid s7 argument");
- O << value;
- }
-
- void
printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
{
unsigned int value = MI->getOperand(OpNo).getImm();
@@ -134,12 +110,6 @@ namespace {
}
void
- printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (unsigned)MI->getOperand(OpNo).getImm();
- }
-
- void
printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
// the value contained in the register. For this reason, the darwin
@@ -221,13 +191,6 @@ namespace {
printOp(MI->getOperand(OpNo), O);
}
- void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // HBR operands are generated in front of branches, hence, the
- // program counter plus the target.
- O << ".+";
- printOp(MI->getOperand(OpNo), O);
- }
-
void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
if (MI->getOperand(OpNo).isImm()) {
printS16ImmOperand(MI, OpNo, O);
diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp
deleted file mode 100644
index 60d7ba7..0000000
--- a/lib/Target/CellSPU/SPUFrameInfo.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUFrameInfo.h"
-#include "SPURegisterNames.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameInfo:
-//===----------------------------------------------------------------------===//
-
-SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm):
- TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
- TM(tm)
-{
- LR[0].first = SPU::R0;
- LR[0].second = 16;
-}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 0000000..432f4a1
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,276 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return MFI->getStackSize() &&
+ (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI.hasDebugInfo();
+ MCSymbol *FrameLabel = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isInt<10>(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (isInt<16>(FrameSize)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ } else {
+ // This is a leaf function -- insert a branch hint iff there are
+ // sufficient number instructions in the basic block. Note that
+ // this is just a best guess based on the basic block's size.
+ if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ dl = MBBI->getDebugLoc();
+
+ // Insert terminator label
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+ .addSym(MMI.getContext().CreateTempSymbol());
+ }
+ }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+ }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const{
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+}
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameLowering.h
index f511acd..4fee72d 100644
--- a/lib/Target/CellSPU/SPUFrameInfo.h
+++ b/lib/Target/CellSPU/SPUFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==//
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
//
// The LLVM Compiler Infrastructure
//
@@ -12,19 +12,39 @@
//
//===----------------------------------------------------------------------===//
-#if !defined(SPUFRAMEINFO_H)
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class SPUFrameInfo: public TargetFrameInfo {
- const TargetMachine &TM;
+ class SPUSubtarget;
+
+ class SPUFrameLowering: public TargetFrameLowering {
+ const SPUSubtarget &Subtarget;
std::pair<unsigned, int> LR[1];
public:
- SPUFrameInfo(const TargetMachine &tm);
+ SPUFrameLowering(const SPUSubtarget &sti);
+
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Perform target-specific stack frame setup.
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//! Return a function's saved spill slots
/*!
@@ -71,5 +91,4 @@ namespace llvm {
};
}
-#define SPUFRAMEINFO_H 1
#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
index 9dbab1d..403d7ef 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
///
/// \return NoHazard
ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU)
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
{
// Initial thoughts on how to do this, but this code cannot work unless the
// function's prolog and epilog code are also being scheduled so that we can
// accurately determine which pipeline is being scheduled.
#if 0
+ assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
ScheduleHazardRecognizer::HazardType retval = NoHazard;
bool mustBeOdd = false;
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
index d0ae2d8..675632c 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -20,7 +20,7 @@
namespace llvm {
class TargetInstrInfo;
-
+
/// SPUHazardRecognizer
class SPUHazardRecognizer : public ScheduleHazardRecognizer
{
@@ -30,7 +30,7 @@ private:
public:
SPUHazardRecognizer(const TargetInstrInfo &TII);
- virtual HazardType getHazardType(SUnit *SU);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
virtual void EmitNoop();
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 2f15984..d226156 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -15,7 +15,7 @@
#include "SPU.h"
#include "SPUTargetMachine.h"
#include "SPUHazardRecognizers.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "SPURegisterNames.h"
#include "SPUTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -111,55 +111,6 @@ namespace {
return false;
}
- //===------------------------------------------------------------------===//
- //! EVT to "useful stuff" mapping structure:
-
- struct valtype_map_s {
- EVT VT;
- unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
- bool ldresult_imm; /// LDRESULT instruction requires immediate?
- unsigned lrinst; /// LR instruction
- };
-
- const valtype_map_s valtype_map[] = {
- { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
- { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
- { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
- { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
- { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
- { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
- // vector types... (sigh!)
- { MVT::v16i8, 0, false, SPU::LRv16i8 },
- { MVT::v8i16, 0, false, SPU::LRv8i16 },
- { MVT::v4i32, 0, false, SPU::LRv4i32 },
- { MVT::v2i64, 0, false, SPU::LRv2i64 },
- { MVT::v4f32, 0, false, SPU::LRv4f32 },
- { MVT::v2f64, 0, false, SPU::LRv2f64 }
- };
-
- const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
- const valtype_map_s *getValueTypeMapEntry(EVT VT)
- {
- const valtype_map_s *retval = 0;
- for (size_t i = 0; i < n_valtype_map; ++i) {
- if (valtype_map[i].VT == VT) {
- retval = valtype_map + i;
- break;
- }
- }
-
-
-#ifndef NDEBUG
- if (retval == 0) {
- report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
- "NULL for " + Twine(VT.getEVTString()));
- }
-#endif
-
- return retval;
- }
-
//! Generate the carry-generate shuffle mask.
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
@@ -221,16 +172,10 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
- /// getI64Imm - Return a target constant with the specified value, of type
- /// i64.
- inline SDValue getI64Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i64);
- }
-
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
+ }
SDNode *emitBuildVector(SDNode *bvNode) {
EVT vecVT = bvNode->getValueType(0);
@@ -268,10 +213,10 @@ namespace {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-
+
HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(),0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment));
CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
@@ -356,13 +301,8 @@ namespace {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
- /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
- /// this target when scheduling the DAG.
- virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
- const TargetInstrInfo *II = TM.getInstrInfo();
- assert(II && "No InstrInfo?");
- return new SPUHazardRecognizer(*II);
- }
+ private:
+ SDValue getRC( MVT );
// Include the pieces autogenerated from the target description.
#include "SPUGenDAGISel.inc"
@@ -450,8 +390,8 @@ bool
SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Index) {
return DFormAddressPredicate(Op, N, Base, Index,
- SPUFrameInfo::minFrameOffset(),
- SPUFrameInfo::maxFrameOffset());
+ SPUFrameLowering::minFrameOffset(),
+ SPUFrameLowering::maxFrameOffset());
}
bool
@@ -467,7 +407,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
int FI = int(FIN->getIndex());
DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
<< FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(0, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -493,7 +433,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
<< " frame index = " << FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -514,7 +454,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
<< " frame index = " << FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -564,8 +504,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Base = CurDAG->getTargetConstant(0, N.getValueType());
Index = N;
return true;
- } else if (Opc == ISD::Register
- ||Opc == ISD::CopyFromReg
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
||Opc == ISD::UNDEF
||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
@@ -625,6 +565,46 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
return false;
}
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+ switch( VT.SimpleTy ) {
+ case MVT::i8:
+ return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i16:
+ return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i32:
+ return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::f32:
+ return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i64:
+ return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i128:
+ return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+ break;
+ default:
+ assert( false && "add a new case here" );
+ }
+ return SDValue();
+}
+
//! Convert the operand from a target-independent to a target-specific node
/*!
*/
@@ -632,7 +612,7 @@ SDNode *
SPUDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = N->getOpcode();
int n_ops = -1;
- unsigned NewOpc;
+ unsigned NewOpc = 0;
EVT OpVT = N->getValueType(0);
SDValue Ops[8];
DebugLoc dl = N->getDebugLoc();
@@ -654,7 +634,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
NewOpc = SPU::Ar32;
Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
- N->getValueType(0), TFI, Imm0),
+ N->getValueType(0), TFI),
0);
n_ops = 2;
}
@@ -669,7 +649,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
EVT Op0VT = Op0.getValueType();
EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
Op0VT, (128 / Op0VT.getSizeInBits()));
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue shufMask;
@@ -703,19 +683,19 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-
+
HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
Op0VecVT, Op0));
-
+
SDValue PromScalar;
if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
PromScalar = SDValue(N, 0);
else
PromScalar = PromoteScalar.getValue();
-
+
SDValue zextShuffle =
CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- PromScalar, PromScalar,
+ PromScalar, PromScalar,
SDValue(shufMaskLoad, 0));
HandleSDNode Dummy2(zextShuffle);
@@ -725,7 +705,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
zextShuffle = Dummy2.getValue();
HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
zextShuffle));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
SelectCode(Dummy.getValue().getNode());
return Dummy.getValue().getNode();
@@ -736,7 +716,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
N->getOperand(0), N->getOperand(1),
SDValue(CGLoad, 0)));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
return N;
@@ -748,7 +728,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
N->getOperand(0), N->getOperand(1),
SDValue(CGLoad, 0)));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
return N;
@@ -779,8 +759,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
if (shift_amt >= 32) {
SDNode *hi32 =
- CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT,
- Op0.getOperand(0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ Op0.getOperand(0), getRC(MVT::i32));
shift_amt -= 32;
if (shift_amt > 0) {
@@ -862,23 +842,12 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDValue Arg = N->getOperand(0);
SDValue Chain = N->getOperand(1);
SDNode *Result;
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-
- if (vtm->ldresult_ins == 0) {
- report_fatal_error("LDRESULT for unsupported type: " +
- Twine(VT.getEVTString()));
- }
-
- Opc = vtm->ldresult_ins;
- if (vtm->ldresult_imm) {
- SDValue Zero = CurDAG->getTargetConstant(0, VT);
-
- Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
- } else {
- Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
- }
+ Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+ MVT::Other, Arg,
+ getRC( VT.getSimpleVT()), Chain);
return Result;
+
} else if (Opc == SPUISD::IndirectAddr) {
// Look at the operands: SelectCode() will catch the cases that aren't
// specifically handled here.
@@ -904,10 +873,10 @@ SPUDAGToDAGISel::Select(SDNode *N) {
NewOpc = SPU::AIr32;
Ops[1] = Op1;
} else {
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
- N->getValueType(0),
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+ N->getValueType(0),
Op1),
- 0);
+ 0);
}
}
Ops[0] = Op0;
@@ -939,7 +908,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDNode *
SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = N->getOperand(1);
EVT ShiftAmtVT = ShiftAmt.getValueType();
@@ -947,7 +916,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue SelMaskVal;
DebugLoc dl = N->getDebugLoc();
- VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
@@ -991,7 +961,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(Bits, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1012,7 +983,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDNode *VecOp0, *Shift = 0;
DebugLoc dl = N->getDebugLoc();
- VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -1058,7 +1030,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(Bits, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1072,21 +1045,23 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDNode *
SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
// Promote Op0 to vector
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = N->getOperand(1);
EVT ShiftAmtVT = ShiftAmt.getValueType();
DebugLoc dl = N->getDebugLoc();
SDNode *VecOp0 =
- CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ VecVT, N->getOperand(0), getRC(MVT::v2i64));
SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
SDNode *SignRot =
CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
SDValue(VecOp0, 0), SignRotAmt);
SDNode *UpperHalfSign =
- CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
SDNode *UpperHalfSignMask =
CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
@@ -1133,7 +1108,8 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(NegShift, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1154,20 +1130,21 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
// Here's where it gets interesting, because we have to parse out the
// subtree handed back in i64vec:
- if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+ if (i64vec.getOpcode() == ISD::BITCAST) {
// The degenerate case where the upper and lower bits in the splat are
// identical:
SDValue Op0 = i64vec.getOperand(0);
ReplaceUses(i64vec, Op0);
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(emitBuildVector(Op0.getNode()), 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(Op0.getNode()), 0),
+ getRC(MVT::i64));
} else if (i64vec.getOpcode() == SPUISD::SHUFB) {
SDValue lhs = i64vec.getOperand(0);
SDValue rhs = i64vec.getOperand(1);
SDValue shufmask = i64vec.getOperand(2);
- if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+ if (lhs.getOpcode() == ISD::BITCAST) {
ReplaceUses(lhs, lhs.getOperand(0));
lhs = lhs.getOperand(0);
}
@@ -1176,7 +1153,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? lhs.getNode()
: emitBuildVector(lhs.getNode()));
- if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+ if (rhs.getOpcode() == ISD::BITCAST) {
ReplaceUses(rhs, rhs.getOperand(0));
rhs = rhs.getOperand(0);
}
@@ -1185,7 +1162,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? rhs.getNode()
: emitBuildVector(rhs.getNode()));
- if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+ if (shufmask.getOpcode() == ISD::BITCAST) {
ReplaceUses(shufmask, shufmask.getOperand(0));
shufmask = shufmask.getOperand(0);
}
@@ -1201,11 +1178,13 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
HandleSDNode Dummy(shufNode);
SDNode *SN = SelectCode(Dummy.getValue().getNode());
if (SN == 0) SN = Dummy.getValue().getNode();
-
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(SN, 0), getRC(MVT::i64));
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(emitBuildVector(i64vec.getNode()), 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(i64vec.getNode()), 0),
+ getRC(MVT::i64));
} else {
report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
"condition");
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 46f3189..e6511d0 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1,4 +1,3 @@
-//
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +13,13 @@
#include "SPURegisterNames.h"
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "SPUMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,41 +41,12 @@ using namespace llvm;
namespace {
std::map<unsigned, const char *> node_names;
- //! EVT mapping to useful data for Cell SPU
- struct valtype_map_s {
- EVT valtype;
- int prefslot_byte;
- };
-
- const valtype_map_s valtype_map[] = {
- { MVT::i1, 3 },
- { MVT::i8, 3 },
- { MVT::i16, 2 },
- { MVT::i32, 0 },
- { MVT::f32, 0 },
- { MVT::i64, 0 },
- { MVT::f64, 0 },
- { MVT::i128, 0 }
- };
-
- const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
- const valtype_map_s *getValueTypeMapEntry(EVT VT) {
- const valtype_map_s *retval = 0;
-
- for (size_t i = 0; i < n_valtype_map; ++i) {
- if (valtype_map[i].valtype == VT) {
- retval = valtype_map + i;
- break;
- }
- }
-
-#ifndef NDEBUG
- if (retval == 0) {
- report_fatal_error("getValueTypeMapEntry returns NULL for " +
- Twine(VT.getEVTString()));
- }
-#endif
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
return retval;
}
@@ -125,8 +96,6 @@ namespace {
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()),
SPUTM(TM) {
- // Fold away setcc operations if possible.
- setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
@@ -376,10 +345,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -439,9 +408,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
// These operations need to be expanded:
setOperationAction(ISD::SDIV, VT, Expand);
@@ -502,8 +471,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
+ node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+ node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -531,10 +500,20 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
//===----------------------------------------------------------------------===//
MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i16 and i32 are valid SETCC result types
- return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
- VT.getSimpleVT().SimpleTy :
- MVT::i32);
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
}
//===----------------------------------------------------------------------===//
@@ -572,113 +551,174 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ int pso = prefslotOffset(InVT);
DebugLoc dl = Op.getDebugLoc();
-
- switch (LN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, MVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, MVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
// Offset the rotate amount by the basePtr and the preferred slot
// byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
basePtr,
- DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+ DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
- // Re-emit as a v16i8 vector load
- result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), LN->isNonTemporal(), 16);
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
// Update the chain
- the_chain = result.getValue(1);
+ the_chain = low.getValue(1);
// Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
- result.getValue(0), rotate);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
InVT, (128 / InVT.getSizeInBits()));
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarily
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+ }
// Handle extending loads by extending the scalar result:
if (ExtType == ISD::SEXTLOAD) {
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
@@ -702,21 +742,6 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
retops, sizeof(retops) / sizeof(retops[0]));
return result;
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
- "than UNINDEXED\n" +
- Twine((unsigned)LN->getAddressingMode()));
- /*NOTREACHED*/
- }
- }
-
- return SDValue();
}
/// Custom lower stores for CellSPU
@@ -734,93 +759,103 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
- switch (SN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- // The vector type we really want to load from the 16-byte chunk.
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- // Load the memory to which to store.
- alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- SN->getSrcValue(), SN->getSrcValueOffset(),
- SN->isVolatile(), SN->isNonTemporal(), 16);
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
// Update the chain
- the_chain = alignLoadVec.getValue(1);
+ the_chain = low.getValue(1);
- LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ LoadSDNode *LN = cast<LoadSDNode>(low);
SDValue theValue = SN->getValue();
- SDValue result;
if (StVT != VT
&& (theValue.getOpcode() == ISD::AssertZext
@@ -844,48 +879,114 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
insertEltOffs);
- SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, alignLoadVec,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, dl, result, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
+ lowMemPtr,
LN->isVolatile(), LN->isNonTemporal(),
- LN->getAlignment());
-
-#if 0 && !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- const SDValue &currentRoot = DAG.getRoot();
-
- DAG.setRoot(result);
- errs() << "------- CellSPU:LowerStore result:\n";
- DAG.dump();
- errs() << "-------\n";
- DAG.setRoot(currentRoot);
- }
-#endif
-
- return result;
- /*UNREACHED*/
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
- "than UNINDEXED\n" +
- Twine((unsigned)SN->getAddressingMode()));
- /*NOTREACHED*/
- }
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
}
- return SDValue();
+ return result;
}
//! Generate the address of a constant pool entry.
@@ -993,7 +1094,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(dbits, MVT::i64);
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
}
return SDValue();
@@ -1013,9 +1114,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
- unsigned ArgOffset = SPUFrameInfo::minStackSize();
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
unsigned ArgRegIdx = 0;
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1080,7 +1181,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// or we're forced to do vararg
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
ArgOffset += StackSlotSize;
}
@@ -1091,8 +1193,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// vararg handling:
if (isVarArg) {
- // FIXME: we should be able to query the argument registers from
- // tablegen generated code.
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
static const unsigned ArgRegs[] = {
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
@@ -1117,9 +1219,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
false, false, 0);
Chain = Store.getOperand(0);
MemOps.push_back(Store);
@@ -1163,14 +1265,14 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ *DAG.getContext());
// FIXME: allow for other calling conventions
CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
+
const unsigned NumArgRegs = ArgLocs.size();
@@ -1184,7 +1286,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Figure out which arguments are going to go in registers, and which in
// memory.
- unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
unsigned ArgRegIdx = 0;
// Keep track of registers passing arguments
@@ -1219,7 +1321,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
ArgOffset += StackSlotSize;
}
@@ -1230,7 +1333,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Accumulate how many bytes are to be pushed on the stack, including the
// linkage area, and parameter passing area. According to the SPU ABI,
// we minimally need space for [LR] and [SP].
- unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
// Insert a call sequence start
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
@@ -1311,7 +1414,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
&Ops[0], Ops.size());
InFlag = Chain.getValue(1);
@@ -1334,7 +1437,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// If the call has results, copy the values out of the ret val registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
-
+
SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
@@ -1567,7 +1670,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(Value32, MVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
break;
}
@@ -1577,7 +1680,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(f64val, MVT::i64);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
break;
}
@@ -1587,7 +1690,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SmallVector<SDValue, 8> Ops;
Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
case MVT::v8i16: {
@@ -1621,7 +1724,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
if (upper == lower) {
// Magic constant that can be matched by IL, ILA, et. al.
SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Val, Val, Val, Val));
} else {
@@ -1650,7 +1753,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
LO32C, LO32C, LO32C, LO32C));
}
@@ -1658,7 +1761,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
// Create upper vector if not a special pattern
if (!upper_special) {
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
HI32C, HI32C, HI32C, HI32C));
}
@@ -1735,14 +1838,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
unsigned PrevElt = 0;
- unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ int rotamt=0;
EVT maskVT; // which of the c?d instructions to use
if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
- maskVT = MVT::v16i8;
+ maskVT = MVT::v16i8;
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
maskVT = MVT::v8i16;
@@ -1758,7 +1861,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned i = 0; i != MaxElts; ++i) {
if (SVN->getMaskElt(i) < 0)
continue;
-
+
unsigned SrcElt = SVN->getMaskElt(i);
if (monotonic) {
@@ -1782,13 +1885,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
PrevElt = SrcElt;
- if (SrcElt == 0)
- V0Elt = i;
} else {
rotate = false;
}
- } else if (i == 0) {
- // First time through, need to keep track of previous element
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
@@ -1806,15 +1908,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
DAG.getConstant(V2EltOffset, MVT::i32));
- SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
} else if (rotate) {
- int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
V1, DAG.getConstant(rotamt, MVT::i16));
} else {
@@ -1999,7 +2102,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(scaleShift, MVT::i32));
}
- vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
// Replicate the bytes starting at byte 0 across the entire vector (for
// consistency with the notion of a unified register set)
@@ -2069,7 +2172,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getRegister(SPU::R1, PtrVT),
DAG.getConstant(Offset, PtrVT));
// widen the mask when dealing with half vectors
- EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
128/ VT.getVectorElementType().getSizeInBits());
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
@@ -2077,7 +2180,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(SPUISD::SHUFB, dl, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
VecOp,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
return result;
}
@@ -2197,12 +2300,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
ConstVec = Op.getOperand(0);
Arg = Op.getOperand(1);
if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
} else {
ConstVec = Op.getOperand(1);
Arg = Op.getOperand(0);
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
}
}
@@ -2243,7 +2346,7 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
*/
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
@@ -2419,7 +2522,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
// selected to a NOP:
- SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
SDValue lhsHi32 =
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2453,7 +2556,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
ISD::SETGT));
}
- SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
SDValue rhsHi32 =
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2567,7 +2670,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
// Type to truncate to
EVT VT = Op.getValueType();
MVT simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
@@ -2575,7 +2678,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
// Create shuffle mask, least significant doubleword of quadword
unsigned maskHigh = 0x08090a0b;
unsigned maskLow = 0x0c0d0e0f;
@@ -2616,6 +2719,12 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType().getSimpleVT();
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
// The type to extend to needs to be a i128 and
// the type to extend from needs to be i64 or i32.
assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
@@ -2640,12 +2749,17 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
DAG.getConstant(31, MVT::i32));
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
// Shuffle bytes - Copy the sign bits into the upper 64 bits
// and the input value into the lower 64 bits.
SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
- DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
}
//! Custom (target-specific) lowering entry point
@@ -2903,8 +3017,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
}
break;
}
- case SPUISD::SHLQUAD_L_BITS:
- case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
@@ -2982,6 +3096,38 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
return TargetLowering::getConstraintType(ConstraintLetter);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const
@@ -3086,3 +3232,28 @@ SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The SPU target isn't yet aware of offsets.
return false;
}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 6d3c90b..95d44af 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -41,8 +41,9 @@ namespace llvm {
CNTB, ///< Count leading ones in bytes
PREFSLOT2VEC, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
- SHLQUAD_L_BITS, ///< Rotate quad left, by bits
- SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
+ SHL_BITS, ///< Shift quad left, by bits
+ SHL_BYTES, ///< Shift quad left, by bytes
+ SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
VEC_ROTL, ///< Vector rotate left
VEC_ROTR, ///< Vector rotate right
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
@@ -129,6 +130,11 @@ namespace llvm {
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -170,6 +176,19 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
};
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 26d6b4f..f9e6c72 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -16,6 +16,7 @@
#include "SPUInstrBuilder.h"
#include "SPUTargetMachine.h"
#include "SPUGenInstrInfo.inc"
+#include "SPUHazardRecognizers.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new SPUHazardRecognizer(*TII);
+}
+
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -129,7 +140,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
if (RC == SPU::GPRCRegisterClass) {
opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
} else if (RC == SPU::R64CRegisterClass) {
@@ -164,7 +175,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
if (RC == SPU::GPRCRegisterClass) {
opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
} else if (RC == SPU::R64CRegisterClass) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 191e55d..e5e9148 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,6 +32,10 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index ca0fe00..25f6fd0 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -416,7 +416,7 @@ multiclass ImmLoadAddress
def lo: ILARegInst<R32C, symbolLo, imm18>;
def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
- [/* no pattern */]>;
+ [(set R32C:$rT, imm18:$val)]>;
}
defm ILA : ImmLoadAddress;
@@ -1167,10 +1167,10 @@ class XSHWRegInst<RegisterClass rclass>:
[(set rclass:$rDest, (sext R16C:$rSrc))]>;
multiclass ExtendHalfwordWord {
- def v4i32: XSHWVecInst<v4i32, v8i16>;
-
+ def v4i32: XSHWVecInst<v8i16, v4i32>;
+
def r16: XSHWRegInst<R32C>;
-
+
def r32: XSHWInRegInst<R32C,
[(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
@@ -1385,59 +1385,6 @@ class ORRegInst<RegisterClass rclass>:
ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
[(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
-// ORCvtForm: OR conversion form
-//
-// This is used to "convert" the preferred slot to its vector equivalent, as
-// well as convert a vector back to its preferred slot.
-//
-// These are effectively no-ops, but need to exist for proper type conversion
-// and type coercion.
-
-class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
- : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = 0b10000010000;
- let Inst{11-17} = RA;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-class ORPromoteScalar<RegisterClass rclass>:
- ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
-
-class ORExtractElt<RegisterClass rclass>:
- ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
-
-/* class ORCvtRegGPRC<RegisterClass rclass>:
- ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
-
-/* class ORCvtGPRCReg<RegisterClass rclass>:
- ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
-
-class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
-
-class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
-
-class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtGPRCVec:
- ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
-
-class ORCvtVecGPRC:
- ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
-
-class ORCvtVecVec:
- ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
multiclass BitwiseOr
{
@@ -1468,119 +1415,48 @@ multiclass BitwiseOr
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
-
- // scalar->vector promotion, prefslot2vec:
- def v16i8_i8: ORPromoteScalar<R8C>;
- def v8i16_i16: ORPromoteScalar<R16C>;
- def v4i32_i32: ORPromoteScalar<R32C>;
- def v2i64_i64: ORPromoteScalar<R64C>;
- def v4f32_f32: ORPromoteScalar<R32FP>;
- def v2f64_f64: ORPromoteScalar<R64FP>;
-
- // vector->scalar demotion, vec2prefslot:
- def i8_v16i8: ORExtractElt<R8C>;
- def i16_v8i16: ORExtractElt<R16C>;
- def i32_v4i32: ORExtractElt<R32C>;
- def i64_v2i64: ORExtractElt<R64C>;
- def f32_v4f32: ORExtractElt<R32FP>;
- def f64_v2f64: ORExtractElt<R64FP>;
-
- // Conversion from vector to GPRC
- def i128_vec: ORCvtVecGPRC;
-
- // Conversion from GPRC to vector
- def vec_i128: ORCvtGPRCVec;
-
-/*
- // Conversion from register to GPRC
- def i128_r64: ORCvtRegGPRC<R64C>;
- def i128_f64: ORCvtRegGPRC<R64FP>;
- def i128_r32: ORCvtRegGPRC<R32C>;
- def i128_f32: ORCvtRegGPRC<R32FP>;
- def i128_r16: ORCvtRegGPRC<R16C>;
- def i128_r8: ORCvtRegGPRC<R8C>;
-
- // Conversion from GPRC to register
- def r64_i128: ORCvtGPRCReg<R64C>;
- def f64_i128: ORCvtGPRCReg<R64FP>;
- def r32_i128: ORCvtGPRCReg<R32C>;
- def f32_i128: ORCvtGPRCReg<R32FP>;
- def r16_i128: ORCvtGPRCReg<R16C>;
- def r8_i128: ORCvtGPRCReg<R8C>;
-*/
-/*
- // Conversion from register to R32C:
- def r32_r16: ORCvtFormRegR32<R16C>;
- def r32_r8: ORCvtFormRegR32<R8C>;
-
- // Conversion from R32C to register
- def r32_r16: ORCvtFormR32Reg<R16C>;
- def r32_r8: ORCvtFormR32Reg<R8C>;
-*/
-
- // Conversion from R64C to register:
- def r32_r64: ORCvtFormR64Reg<R32C>;
- // def r16_r64: ORCvtFormR64Reg<R16C>;
- // def r8_r64: ORCvtFormR64Reg<R8C>;
-
- // Conversion to R64C from register:
- def r64_r32: ORCvtFormRegR64<R32C>;
- // def r64_r16: ORCvtFormRegR64<R16C>;
- // def r64_r8: ORCvtFormRegR64<R8C>;
-
- // bitconvert patterns:
- def r32_f32: ORCvtFormR32Reg<R32FP,
- [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
- def f32_r32: ORCvtFormRegR32<R32FP,
- [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
-
- def r64_f64: ORCvtFormR64Reg<R64FP,
- [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
- def f64_r64: ORCvtFormRegR64<R64FP,
- [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
}
defm OR : BitwiseOr;
-// scalar->vector promotion patterns (preferred slot to vector):
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
- (ORv16i8_i8 R8C:$rA)>;
+ (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
- (ORv8i16_i16 R16C:$rA)>;
+ (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
- (ORv4i32_i32 R32C:$rA)>;
+ (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
- (ORv2i64_i64 R64C:$rA)>;
+ (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
- (ORv4f32_f32 R32FP:$rA)>;
+ (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
- (ORv2f64_f64 R64FP:$rA)>;
-
-// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
-// known as converting the vector back to its preferred slot
-
-def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
- (ORi8_v16i8 VECREG:$rA)>;
+ (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
-def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
- (ORi16_v8i16 VECREG:$rA)>;
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
-def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
- (ORi32_v4i32 VECREG:$rA)>;
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
-def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
- (ORi64_v2i64 VECREG:$rA)>;
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
-def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
- (ORf32_v4f32 VECREG:$rA)>;
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
-def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
- (ORf64_v2f64 VECREG:$rA)>;
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
// Load Register: This is an assembler alias for a bitwise OR of a register
// against itself. It's here because it brings some clarity to assembly
@@ -2093,7 +1969,7 @@ defm EQV: BitEquivalence;
class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
- IntegerOp, pattern>;
+ ShuffleOp, pattern>;
class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
@@ -2134,7 +2010,7 @@ defm SHUFB : ShuffleBytes;
class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class SHLHVecInst<ValueType vectype>:
SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
@@ -2156,7 +2032,7 @@ defm SHLH : ShiftLeftHalfword;
class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class SHLHIVecInst<ValueType vectype>:
SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2182,7 +2058,7 @@ def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
multiclass ShiftLeftWord
{
@@ -2201,7 +2077,7 @@ defm SHL: ShiftLeftWord;
class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
multiclass ShiftLeftWordImm
{
@@ -2230,7 +2106,7 @@ defm SHLI : ShiftLeftWordImm;
class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBIVecInst<ValueType vectype>:
SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2259,7 +2135,7 @@ defm SHLQBI : ShiftLeftQuadByBits;
// enforcement, whereas with SHLQBI, we have to "take it on faith."
class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBIIVecInst<ValueType vectype>:
SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2283,7 +2159,7 @@ defm SHLQBII : ShiftLeftQuadByBitsImm;
class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYVecInst<ValueType vectype>:
SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2306,7 +2182,7 @@ defm SHLQBY: ShiftLeftQuadBytes;
class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYIVecInst<ValueType vectype>:
SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2330,7 +2206,7 @@ defm SHLQBYI : ShiftLeftQuadBytesImm;
class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYBIVecInst<ValueType vectype>:
SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2359,7 +2235,7 @@ defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTHVecInst<ValueType vectype>:
ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
@@ -2386,7 +2262,7 @@ def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTHIVecInst<ValueType vectype>:
ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2413,7 +2289,7 @@ def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTVecInst<ValueType vectype>:
ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2461,7 +2337,7 @@ def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
@@ -2491,12 +2367,15 @@ defm ROTI : RotateLeftWordImm;
class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
-class ROTQBYVecInst<ValueType vectype>:
- ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+ ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+ [(set (type rc:$rT),
+ (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+ ROTQBYGenInst<type, VECREG>;
multiclass RotateQuadLeftByBytes
{
@@ -2506,6 +2385,7 @@ multiclass RotateQuadLeftByBytes
def v4f32: ROTQBYVecInst<v4f32>;
def v2i64: ROTQBYVecInst<v2i64>;
def v2f64: ROTQBYVecInst<v2f64>;
+ def i128: ROTQBYGenInst<i128, GPRC>;
}
defm ROTQBY: RotateQuadLeftByBytes;
@@ -2516,12 +2396,15 @@ defm ROTQBY: RotateQuadLeftByBytes;
class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+ ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+ [(set (type rclass:$rT),
+ (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
class ROTQBYIVecInst<ValueType vectype>:
- ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+ ROTQBYIGenInst<vectype, VECREG>;
multiclass RotateQuadByBytesImm
{
@@ -2531,6 +2414,7 @@ multiclass RotateQuadByBytesImm
def v4f32: ROTQBYIVecInst<v4f32>;
def v2i64: ROTQBYIVecInst<v2i64>;
def vfi64: ROTQBYIVecInst<v2f64>;
+ def i128: ROTQBYIGenInst<i128, GPRC>;
}
defm ROTQBYI: RotateQuadByBytesImm;
@@ -2539,7 +2423,7 @@ defm ROTQBYI: RotateQuadByBytesImm;
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00110011100, OOL, IOL,
"rotqbybi\t$rT, $rA, $shift",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
@@ -2564,7 +2448,7 @@ defm ROTQBYBI : RotateQuadByBytesByBitshift;
class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBIVecInst<ValueType vectype>:
ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2589,7 +2473,7 @@ defm ROTQBI: RotateQuadByBitCount;
class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
PatLeaf pred>:
@@ -2624,7 +2508,7 @@ defm ROTQBII : RotateQuadByBitCountImm;
class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTHMv8i16:
ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2666,7 +2550,7 @@ def : Pat<(srl R16C:$rA, R8C:$rB),
class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTHMIv8i16:
ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
@@ -2697,7 +2581,7 @@ def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
// ROTM v4i32 form: See the ROTHM v8i16 comments.
class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTMv4i32:
ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2732,7 +2616,7 @@ def : Pat<(srl R32C:$rA, R8C:$rB),
// ROTMI v4i32 form: See the comment for ROTHM v8i16.
def ROTMIv4i32:
RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotateShift,
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
[(set (v4i32 VECREG:$rT),
(SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
@@ -2745,7 +2629,7 @@ def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
// ROTMI r32 form: know how to complement the immediate value.
def ROTMIr32:
RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotateShift,
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
[(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
def : Pat<(srl R32C:$rA, (i16 imm:$val)),
@@ -2762,7 +2646,7 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)),
class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYVecInst<ValueType vectype>:
ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2785,9 +2669,13 @@ multiclass RotateQuadBytes
defm ROTQMBY : RotateQuadBytes;
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+ (ROTQMBYr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0))>;
+
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYIVecInst<ValueType vectype>:
ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2827,7 +2715,7 @@ defm ROTQMBYI : RotateQuadBytesImm;
class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYBIVecInst<ValueType vectype>:
ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2839,6 +2727,8 @@ multiclass RotateMaskQuadByBitCount
def v8i16: ROTQMBYBIVecInst<v8i16>;
def v4i32: ROTQMBYBIVecInst<v4i32>;
def v2i64: ROTQMBYBIVecInst<v2i64>;
+ def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [/*no pattern*/]>;
}
defm ROTQMBYBI: RotateMaskQuadByBitCount;
@@ -2850,7 +2740,7 @@ defm ROTQMBYBI: RotateMaskQuadByBitCount;
class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBIVecInst<ValueType vectype>:
ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2873,13 +2763,19 @@ multiclass RotateMaskQuadByBits
defm ROTQMBI: RotateMaskQuadByBits;
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+ (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0)),
+ (SFIr32 R32C:$rB, 0))>;
+
+
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate quad and mask by bits, immediate
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBIIVecInst<ValueType vectype>:
ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2907,7 +2803,7 @@ defm ROTQMBII: RotateMaskQuadByBitsImm;
def ROTMAHv8i16:
RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotateShift,
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
@@ -2923,7 +2819,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
def ROTMAHr16:
RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotateShift,
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(sra R16C:$rA, R32C:$rB),
@@ -2939,7 +2835,7 @@ def : Pat<(sra R16C:$rA, R8C:$rB),
def ROTMAHIv8i16:
RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- "rotmahi\t$rT, $rA, $val", RotateShift,
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
[(set (v8i16 VECREG:$rT),
(SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
@@ -2951,7 +2847,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
def ROTMAHIr16:
RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
- "rotmahi\t$rT, $rA, $val", RotateShift,
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
[(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
def : Pat<(sra R16C:$rA, (i32 imm:$val)),
@@ -2962,7 +2858,7 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)),
def ROTMAv4i32:
RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotateShift,
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
@@ -2978,7 +2874,7 @@ def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
def ROTMAr32:
RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotateShift,
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(sra R32C:$rA, R32C:$rB),
@@ -2995,7 +2891,7 @@ def : Pat<(sra R32C:$rA, R8C:$rB),
class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011110000, OOL, IOL,
"rotmai\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
@@ -4010,7 +3906,7 @@ def FCGTf32 :
"fcgt\t$rT, $rA, $rB", SPrecFP,
[(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
-def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
(FCGTf32 R32FP:$rA, R32FP:$rB)>;
def FCMGTf32 :
@@ -4018,7 +3914,7 @@ def FCMGTf32 :
"fcmgt\t$rT, $rA, $rB", SPrecFP,
[(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
(FCMGTf32 R32FP:$rA, R32FP:$rB)>;
//--------------------------------------------------------------------------
@@ -4320,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
// in the odd pipeline)
//===----------------------------------------------------------------------===//
-def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
let Pattern = [];
let Inst{0-10} = 0b10000000010;
@@ -4379,30 +4275,43 @@ def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
- (v16i8 (ORvec_i128 GPRC:$src))>;
+ (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
- (v8i16 (ORvec_i128 GPRC:$src))>;
+ (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
- (v4i32 (ORvec_i128 GPRC:$src))>;
+ (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
- (v2i64 (ORvec_i128 GPRC:$src))>;
+ (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
- (v4f32 (ORvec_i128 GPRC:$src))>;
+ (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
- (v2f64 (ORvec_i128 GPRC:$src))>;
+ (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
//===----------------------------------------------------------------------===//
// Instruction patterns:
@@ -4453,11 +4362,12 @@ def : Pat<(i32 (zext R8C:$rSrc)),
// zext 8->64: Zero extend bytes to double words
def : Pat<(i64 (zext R8C:$rSrc)),
- (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
- (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+ (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+ (COPY_TO_REGCLASS
+ (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
0x4),
(ILv4i32 0x0),
- (FSMBIv4i32 0x0f0f)))>;
+ (FSMBIv4i32 0x0f0f)), R64C)>;
// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
def : Pat<(i16 (anyext R8C:$rSrc)),
@@ -4465,7 +4375,7 @@ def : Pat<(i16 (anyext R8C:$rSrc)),
// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
def : Pat<(i32 (anyext R8C:$rSrc)),
- (ORIi8i32 R8C:$rSrc, 0)>;
+ (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
// sext 16->64: Sign extend halfword to double word
def : Pat<(sext_inreg R64C:$rSrc, i16),
@@ -4489,7 +4399,7 @@ def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
// anyext 16->32: Extend 16->32 bits, irrespective of sign
def : Pat<(i32 (anyext R16C:$rSrc)),
- (ORIi16i32 R16C:$rSrc, 0)>;
+ (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
//===----------------------------------------------------------------------===//
// Truncates:
@@ -4498,61 +4408,61 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
//===----------------------------------------------------------------------===//
def : Pat<(i8 (trunc GPRC:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
def : Pat<(i8 (trunc R64C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
def : Pat<(i8 (trunc R32C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv4i32_i32 R32C:$src),
- (ORv4i32_i32 R32C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
def : Pat<(i8 (trunc R16C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv8i16_i16 R16C:$src),
- (ORv8i16_i16 R16C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
def : Pat<(i16 (trunc GPRC:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
def : Pat<(i16 (trunc R64C:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
def : Pat<(i16 (trunc R32C:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv4i32_i32 R32C:$src),
- (ORv4i32_i32 R32C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
def : Pat<(i32 (trunc GPRC:$src)),
- (ORi32_v4i32
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
def : Pat<(i32 (trunc R64C:$src)),
- (ORi32_v4i32
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
//===----------------------------------------------------------------------===//
// Address generation: SPU, like PPC, has to split addresses into high and
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 25ba88a..99aaeb0 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -24,9 +24,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- // Has leb128, .loc and .file
+ // Has leb128
HasLEB128 = true;
- HasDotLocAndDotFile = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 647da30..a6e621f 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -19,16 +19,16 @@ def SPU_GenControl : SDTypeProfile<1, 1, []>;
def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// Operand constraints:
//===----------------------------------------------------------------------===//
def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
// Operand type constraints for vector shuffle/permute operations
@@ -83,10 +83,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPUISelLowering.h):
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-// Shift left quadword by bits and bytes
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
-
// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
@@ -105,6 +101,12 @@ def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
SPUvecshift_type>;
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
// SPU form select mask for bytes, immediate
def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
@@ -154,4 +156,4 @@ class NoEncode<string E> {
//===----------------------------------------------------------------------===//
def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 0000000..e2bd2d7
--- /dev/null
+++ b/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index e1a0358..96cde51 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -143,7 +143,7 @@ def immU16 : PatLeaf<(imm), [{
def imm18 : PatLeaf<(imm), [{
// imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
int Value = (int) N->getZExtValue();
- return ((Value & ((1 << 19) - 1)) == Value);
+ return isUInt<18>(Value);
}]>;
def lo16 : PatLeaf<(imm), [{
@@ -203,7 +203,7 @@ def FPimm_sext16 : SDNodeXForm<fpimm, [{
def FPimm_u18 : SDNodeXForm<fpimm, [{
float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1));
+ return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
}]>;
def fpimmSExt16 : PatLeaf<(fpimm), [{
@@ -225,7 +225,7 @@ def hi16_f32 : PatLeaf<(fpimm), [{
def fpimm18 : PatLeaf<(fpimm), [{
if (N->getValueType(0) == MVT::f32) {
uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
- return ((Value & ((1 << 19) - 1)) == Value);
+ return isUInt<18>(Value);
}
return false;
@@ -654,7 +654,11 @@ def memrr : Operand<iPTR> {
// A-form : abs (256K LSA offset)
// D-form(2): [r+I7] (7-bit signed offset + reg)
-def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", [], []>;
-def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", [], []>;
-def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", [], []>;
-def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", [], []>;
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+ [], [SDNPWantRoot]>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+ [], [SDNPWantRoot]>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+ [], [SDNPWantRoot]>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+ [], [SDNPWantRoot]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index cf71891..0bdd50a 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -18,7 +18,7 @@
#include "SPUInstrBuilder.h"
#include "SPUSubtarget.h"
#include "SPUMachineFunction.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -30,7 +30,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -240,25 +240,6 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-// needsFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool needsFP(const MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register. This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool
-SPURegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getStackSize() && needsFP(MF);
-}
-
//--------------------------------------------------------------------------
void
SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -302,7 +283,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MachineOperand &MO = MI.getOperand(OpNo);
// Offset is biased by $lr's slot at the bottom.
- Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize();
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
assert((Offset & 0xf) == 0
&& "16-byte alignment violated in eliminateFrameIndex");
@@ -329,225 +310,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
}
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void
-SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
-{
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
-
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
- assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
- unsigned AlignMask = Align - 1;
-
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
-
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
-
- // Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
- // Update frame info.
- MFI->setStackSize(FrameSize);
-}
-
-void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS)
- const {
- // Mark LR and SP unused, since the prolog spills them to stack and
- // we don't want anyone else to spill them for us.
- //
- // Also, unless R2 is really used someday, don't spill it automatically.
- MF.getRegInfo().setPhysRegUnused(SPU::R0);
- MF.getRegInfo().setPhysRegUnused(SPU::R1);
- MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = &SPU::R32CRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
-
-
-}
-
-void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
-{
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Prepare for debug frame info.
- bool hasDebugInfo = MMI.hasDebugInfo();
- MCSymbol *FrameLabel = 0;
-
- // Move MBBI back to the beginning of the function.
- MBBI = MBB.begin();
-
- // Work out frame sizes.
- determineFrameLayout(MF);
- int FrameSize = MFI->getStackSize();
-
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
- if (hasDebugInfo) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
- }
-
- // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
- // for the ABI
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
- .addReg(SPU::R1);
- if (isInt<10>(FrameSize)) {
- // Spill $sp to adjusted $sp
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
- .addReg(SPU::R1);
- // Adjust $sp by required amout
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (isInt<16>(FrameSize)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(-16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
- .addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
-
- if (hasDebugInfo) {
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
- // Show update of SP.
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == SPU::R0) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
- }
-
- // Mark effective beginning of when frame pointer is ready.
- MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
- MachineLocation FPDst(SPU::R1);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
- }
- } else {
- // This is a leaf function -- insert a branch hint iff there are
- // sufficient number instructions in the basic block. Note that
- // this is just a best guess based on the basic block's size.
- if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- dl = MBBI->getDebugLoc();
-
- // Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
- .addSym(MMI.getContext().CreateTempSymbol());
- }
- }
-}
-
-void
-SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int FrameSize = MFI->getStackSize();
- int LinkSlotOffset = SPUFrameInfo::stackSlotSize();
- DebugLoc dl = MBBI->getDebugLoc();
-
- assert(MBBI->getOpcode() == SPU::RET &&
- "Can only insert epilog into returning blocks");
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = FrameSize + SPUFrameInfo::minStackSize();
- if (isInt<10>(FrameSize + LinkSlotOffset)) {
- // Reload $lr, adjust $sp by required amount
- // Note: We do this to slightly improve dual issue -- not by much, but it
- // is an opportunity for dual issue.
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(FrameSize + LinkSlotOffset)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
- .addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
- addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
- }
-}
-
unsigned
SPURegisterInfo::getRARegister() const
{
@@ -560,26 +322,16 @@ SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
return SPU::R1;
}
-void
-SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const
-{
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
-
int
SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
// FIXME: Most probably dwarf numbers differs for Linux and Darwin
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
-int
+int
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
{
- switch(dFormOpcode)
+ switch(dFormOpcode)
{
case SPU::AIr32: return SPU::Ar32;
case SPU::LQDr32: return SPU::LQXr32;
@@ -602,10 +354,10 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
// TODO this is already copied from PPC. Could this convenience function
// be moved to the RegScavenger class?
-unsigned
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
RegScavenger *RS,
- const TargetRegisterClass *RC,
+ const TargetRegisterClass *RC,
int SPAdj) const
{
assert(RS && "Register scavenging must be on");
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index aedb769..641da04 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -33,7 +33,7 @@ namespace llvm {
public:
SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
-
+
//! Translate a register's enum value to a register number
/*!
This method translates a register's enum value to it's regiser number,
@@ -56,8 +56,6 @@ namespace llvm {
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
- //! Prediate: Target has dedicated frame pointer
- bool hasFP(const MachineFunction &MF) const;
//! Eliminate the call frame setup pseudo-instructions
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -65,21 +63,11 @@ namespace llvm {
//! Convert frame indicies into machine operands
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS = NULL) const;
- //! Determine the frame's layour
- void determineFrameLayout(MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
- //! Emit the function prologue
- void emitPrologue(MachineFunction &MF) const;
- //! Emit the function epilogue
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
//! Get return address register (LR, aka R0)
unsigned getRARegister() const;
//! Get the stack frame register (SP, aka R1)
unsigned getFrameRegister(const MachineFunction &MF) const;
- //! Perform target-specific stack frame setup.
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//------------------------------------------------------------------------
// New methods added:
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index a0b581f..9cd3c23 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -32,11 +32,12 @@ def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
def ByteOp : InstrItinClass; // EVEN_UNIT
def IntegerOp : InstrItinClass; // EVEN_UNIT
def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
-def RotateShift : InstrItinClass; // EVEN_UNIT
+def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
def ImmLoad : InstrItinClass; // EVEN_UNIT
/* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
@@ -51,7 +52,8 @@ def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<RotateShift , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
]>;
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 0f18b7f..07c8352 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -14,6 +14,8 @@
#include "SPUSubtarget.h"
#include "SPU.h"
#include "SPUGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "SPURegisterInfo.h"
using namespace llvm;
@@ -34,3 +36,22 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
/// producing code for the JIT.
void SPUSubtarget::SetJITMode() {
}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ // CriticalPathsRCs seems to be the set of
+ // RegisterClasses that antidep breakings are performed for.
+ // Do it for all register classes
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&SPU::R8CRegClass);
+ CriticalPathRCs.push_back(&SPU::R16CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+ CriticalPathRCs.push_back(&SPU::R64CRegClass);
+ CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index 88201c6..d7929302 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -81,9 +81,13 @@ namespace llvm {
/// properties of this subtarget.
const char *getTargetDataString() const {
return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
- "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
"-s:128:128-n32:64";
}
+
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 480ec3f..3ed7361 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -29,7 +29,7 @@ extern "C" void LLVMInitializeCellSPUTarget() {
}
const std::pair<unsigned, int> *
-SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
NumEntries = 1;
return &LR[0];
}
@@ -40,7 +40,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
Subtarget(TT, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
- FrameInfo(*this),
+ FrameLowering(Subtarget),
TLInfo(*this),
TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
@@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
PM.add(createSPUISelDag(*this));
return false;
}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(*this));
+ return true;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 7e02701..75abd5e 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -18,14 +18,14 @@
#include "SPUInstrInfo.h"
#include "SPUISelLowering.h"
#include "SPUSelectionDAGInfo.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
class PassManager;
class GlobalValue;
-class TargetFrameInfo;
+class TargetFrameLowering;
/// SPUTargetMachine
///
@@ -33,7 +33,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUSubtarget Subtarget;
const TargetData DataLayout;
SPUInstrInfo InstrInfo;
- SPUFrameInfo FrameInfo;
+ SPUFrameLowering FrameLowering;
SPUTargetLowering TLInfo;
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
@@ -48,8 +48,8 @@ public:
virtual const SPUInstrInfo *getInstrInfo() const {
return &InstrInfo;
}
- virtual const SPUFrameInfo *getFrameInfo() const {
- return &FrameInfo;
+ virtual const SPUFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
}
/*!
\note Cell SPU does not support JIT today. It could support JIT at some
@@ -75,13 +75,14 @@ public:
return &DataLayout;
}
- virtual const InstrItineraryData getInstrItineraryData() const {
- return InstrItins;
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
}
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
};
} // end namespace llvm
OpenPOWER on IntegriCloud