summaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp23
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp50
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td3
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp37
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp20
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp260
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h29
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp33
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp28
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td4
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td4
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp22
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp21
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp10
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp209
27 files changed, 586 insertions, 297 deletions
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index fe168a5..c0c83cc 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -33,7 +33,6 @@ add_llvm_target(PowerPCCodeGen
PPCTargetObjectFile.cpp
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
- PPCSelectionDAGInfo.cpp
PPCTLSDynamicCall.cpp
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5c38fe1..30f232a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -51,10 +51,9 @@ static MCInstrInfo *createPPCMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
- Triple TheTriple(TT);
- bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
- TheTriple.getArch() == Triple::ppc64le);
+static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
+ bool isPPC64 =
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
unsigned Flavour = isPPC64 ? 0 : 1;
unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
@@ -65,9 +64,7 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitPPCMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
@@ -90,22 +87,20 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
- Triple T(TT);
- if (T.isOSDarwin())
+ if (TT.isOSDarwin())
RM = Reloc::DynamicNoPIC;
else
RM = Reloc::Static;
}
if (CM == CodeModel::Default) {
- Triple T(TT);
- if (!T.isOSDarwin() &&
- (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
+ if (!TT.isOSDarwin() &&
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
CM = CodeModel::Medium;
}
X->initMCCodeGenInfo(RM, CM, OL);
@@ -231,7 +226,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
static MCTargetStreamer *
createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
- if (TT.getObjectFormat() == Triple::ELF)
+ if (TT.isOSBinFormatELF())
return new PPCTargetELFStreamer(S);
return new PPCTargetMachOStreamer(S);
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 87a5236..199a0de 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -197,7 +197,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// External or weakly linked global variables need non-lazily-resolved stubs
if (TM.getRelocationModel() != Reloc::Static &&
- (GV->isDeclaration() || GV->isWeakForLinker())) {
+ !GV->isStrongDefinitionForLinker()) {
if (!GV->hasHiddenVisibility()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -369,28 +369,70 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
- EncodedBytes = 6*4;
+ EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
+ // Save the current TOC pointer before the remote call.
+ int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
+
+
+ // If we're on ELFv1, then we need to load the actual function pointer from
+ // the function descriptor.
+ if (!Subtarget->isELFv2ABI()) {
+ // Load the new TOC pointer and the function address, but not r11
+ // (needing this is rare, and loading it here would prevent passing it
+ // via a 'nest' parameter.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(8)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ }
+
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ ++EncodedBytes;
+
+ // Restore the TOC pointer after the call.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
}
+ // Each instruction is 4 bytes.
+ EncodedBytes *= 4;
+
// Emit padding.
unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
@@ -624,7 +666,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
IsExternal = GV->isDeclaration();
IsCommon = GV->hasCommonLinkage();
IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker());
+ !GV->isStrongDefinitionForLinker();
IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
@@ -706,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker());
+ !GV->isStrongDefinitionForLinker();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4161317..baadf08 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -351,8 +351,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
Opcode = ISD::FTRUNC; break;
}
- MVT VTy =
- TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ auto &DL = CI->getModule()->getDataLayout();
+ MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
+ true);
if (VTy == MVT::Other)
return true;
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 874a6fc..5bc9124 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -133,6 +133,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// register having an odd register number.
CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fafcd76..5f236f7 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -262,7 +262,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
// fast-isel, and return its equivalent machine type in VT.
// FIXME: Copied directly from ARM -- factor into base class?
bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT Evt = TLI.getValueType(Ty, true);
+ EVT Evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (Evt == MVT::Other || !Evt.isSimple()) return false;
@@ -324,12 +324,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
return PPCComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
@@ -799,7 +800,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
bool IsZExt, unsigned DestReg) {
Type *Ty = SrcValue1->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -893,8 +894,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
// Attempt to fast-select a floating-point extend instruction.
bool PPCFastISel::SelectFPExt(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
@@ -911,8 +912,8 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
// Attempt to fast-select a floating-point truncate instruction.
bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
@@ -992,7 +993,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
@@ -1157,7 +1158,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
// Attempt to fast-select a binary integer operation that isn't already
// handled automatically.
bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
@@ -1594,7 +1595,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -1641,7 +1642,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
@@ -1769,8 +1770,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
// Attempt to fast-select an integer truncate instruction.
bool PPCFastISel::SelectTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
return false;
@@ -1806,8 +1807,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
- SrcEVT = TLI.getValueType(SrcTy, true);
- DestEVT = TLI.getValueType(DestTy, true);
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
@@ -1979,7 +1980,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// on the "if" path here.
if (CModel == CodeModel::Large ||
(GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ !GV->isStrongDefinitionForLinker()) ||
GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
@@ -2127,7 +2128,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b4008e4..87229d8 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UsedRegMask = 0;
for (unsigned i = 0; i != 32; ++i)
- if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ if (MRI.isPhysRegModified(VRRegNo[i]))
UsedRegMask |= 1 << (31-i);
// Live in and live out values already must be in the mask, so don't bother
@@ -1158,9 +1159,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *) const {
+void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
@@ -1168,8 +1171,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.setPhysRegUnused(LR);
+ SavedRegs.reset(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -1214,9 +1216,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
if (!isPPC64 && !isDarwinABI &&
- (MRI.isPhysRegUsed(PPC::CR2) ||
- MRI.isPhysRegUsed(PPC::CR3) ||
- MRI.isPhysRegUsed(PPC::CR4))) {
+ (SavedRegs.test(PPC::CR2) ||
+ SavedRegs.test(PPC::CR3) ||
+ SavedRegs.test(PPC::CR4))) {
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 28d074e..d6a389b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -45,8 +45,8 @@ public:
bool needsFP(const MachineFunction &MF) const;
void replaceFPWithRealFP(MachineFunction &MF) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c85c261..01a3acb 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -102,7 +102,8 @@ namespace {
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
- return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy());
+ return CurDAG->getTargetConstant(
+ Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
}
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
@@ -313,7 +314,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
const Module *M = MF->getFunction()->getParent();
DebugLoc dl;
- if (PPCLowering->getPointerTy() == MVT::i32) {
+ if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
if (PPCSubTarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
if (M->getPICLevel() == PICLevel::Small) {
@@ -342,7 +343,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
}
return CurDAG->getRegister(GlobalBaseReg,
- PPCLowering->getPointerTy()).getNode();
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()))
+ .getNode();
}
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
@@ -2205,7 +2207,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
if (!PPCSubTarget->useCRBits() &&
@@ -2468,10 +2471,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops));
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2506,10 +2510,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
- return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops));
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
}
}
@@ -2662,7 +2667,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
- EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
@@ -2901,7 +2907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
if ((GValue->getType()->getElementType()->isFunctionTy() &&
- (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ !GValue->isStrongDefinitionForLinker()) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
@@ -2915,7 +2921,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Generate a PIC-safe GOT reference.
assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
- return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ return CurDAG->SelectNodeTo(
+ N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
+ MVT::i32);
}
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
@@ -3398,9 +3406,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
bool IsModified;
do {
IsModified = false;
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ for (SDNode &Node : CurDAG->allnodes()) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
if (!MachineNode || MachineNode->use_empty())
continue;
SDNode *ResNode = MachineNode;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 594472b..0ed9b05 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -952,7 +952,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
// Darwin passes everything on 4 byte boundary.
if (Subtarget.isDarwin())
return 4;
@@ -1055,7 +1056,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
+ EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
@@ -1101,7 +1103,7 @@ static bool isConstantOrUndef(int Op, int Val) {
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1132,7 +1134,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1174,7 +1176,7 @@ bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
if (!Subtarget.hasP8Vector())
return false;
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1237,7 +1239,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
@@ -1262,7 +1264,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
@@ -1352,7 +1354,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
unsigned indexOffset = CheckEven ? 4 : 0;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
@@ -1399,7 +1401,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
- bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
@@ -1456,7 +1458,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (DAG.getDataLayout().isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1796,7 +1798,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
}
- Disp = DAG.getTargetConstant(0, dl, getPointerTy());
+ Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -2084,7 +2086,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool is64bit = Subtarget.isPPC64();
const Module *M = DAG.getMachineFunction().getFunction()->getParent();
PICLevel::Level picLevel = M->getPICLevel();
@@ -2270,7 +2272,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
@@ -2399,11 +2401,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
- Type *IntPtrTy =
- DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
- *DAG.getContext());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2440,7 +2440,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2476,8 +2476,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
PtrVT);
@@ -2797,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
@@ -3023,7 +3022,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
assert(!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions");
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
@@ -3059,12 +3058,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned NumBytes = LinkageSize;
unsigned AvailableFPRs = Num_FPR_Regs;
unsigned AvailableVRs = Num_VR_Regs;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (Ins[i].Flags.isNest())
+ continue;
+
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
NumBytes, AvailableFPRs, AvailableVRs,
Subtarget.hasQPX()))
HasParameterArea = true;
+ }
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
@@ -3216,6 +3219,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ break;
+ }
+
// These can be scalar arguments or elements of an integer array type
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
@@ -3425,7 +3439,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -3845,7 +3859,8 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
return nullptr; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
- DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+ DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())).getNode();
}
namespace {
@@ -3991,7 +4006,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
if (!isTailCall) {
if (isVector) {
SDValue StackPtr;
@@ -4053,7 +4068,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
- bool isTailCall, bool IsPatchPoint,
+ bool isTailCall, bool IsPatchPoint, bool hasNest,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
@@ -4062,7 +4077,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
@@ -4084,8 +4099,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
(Subtarget.getTargetTriple().isMacOSX() &&
Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
- (G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) ||
+ !G->getGlobal()->isStrongDefinitionForLinker()) ||
(Subtarget.isTargetELF() && !isPPC64 &&
!G->getGlobal()->hasLocalLinkage() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -4196,11 +4210,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Chain = TOCVal.getValue(0);
InFlag = TOCVal.getValue(1);
- SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
- InFlag);
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
- Chain = EnvVal.getValue(0);
- InFlag = EnvVal.getValue(1);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+ }
MTCTROps[0] = Chain;
MTCTROps[1] = LoadFuncPtr;
@@ -4218,7 +4236,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -4254,8 +4272,7 @@ static
bool isLocalCall(const SDValue &Callee)
{
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return !G->getGlobal()->isDeclaration() &&
- !G->getGlobal()->isWeakForLinker();
+ return G->getGlobal()->isStrongDefinitionForLinker();
return false;
}
@@ -4308,7 +4325,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg, bool IsPatchPoint,
- SelectionDAG &DAG,
+ bool hasNest, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
SDValue InFlag, SDValue Chain,
@@ -4321,8 +4338,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
- SPDiff, isTailCall, IsPatchPoint, RegsToPass,
- Ops, NodeTys, CS, Subtarget);
+ SPDiff, isTailCall, IsPatchPoint, hasNest,
+ RegsToPass, Ops, NodeTys, CS, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -4381,7 +4398,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// allocated and an unnecessary move instruction being generated.
CallOpc = PPCISD::BCTRL_LOAD_TOC;
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -4586,7 +4603,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
unsigned LocMemOffset = ByValVA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
// Create a copy of the argument in the local area of the current
// stack frame.
@@ -4623,7 +4641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
if (!isTailCall) {
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(),
@@ -4664,7 +4683,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
NumBytes, Ins, InVals, CS);
}
@@ -4703,8 +4723,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
+ bool hasNest = false;
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4758,6 +4779,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
+ if (Flags.isNest())
+ continue;
+
if (CallConv == CallingConv::Fast) {
if (Flags.isByVal())
NumGPRsUsed += (Flags.getByValSize()+7)/8;
@@ -5021,6 +5045,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
+ hasNest = true;
+ break;
+ }
+
// These can be scalar arguments or elements of an integer array type
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
@@ -5302,9 +5333,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CS);
}
SDValue
@@ -5320,7 +5351,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned NumOps = Outs.size();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
@@ -5693,7 +5724,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
NumBytes, Ins, InVals, CS);
}
@@ -5764,7 +5796,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
// Get the corect type for pointers.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
// Construct the stack pointer operand.
bool isPPC64 = Subtarget.isPPC64();
@@ -5794,7 +5826,7 @@ SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
@@ -5817,7 +5849,7 @@ SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
@@ -5845,7 +5877,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDLoc dl(Op);
// Get the corect type for pointers.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
DAG.getConstant(0, dl, PtrVT), Size);
@@ -5888,8 +5920,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue BasePtr = LD->getBasePtr();
MachineMemOperand *MMO = LD->getMemOperand();
- SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
- BasePtr, MVT::i8, MMO);
+ SDValue NewLD =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
+ BasePtr, MVT::i8, MMO);
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
@@ -5913,7 +5946,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = ST->getValue();
MachineMemOperand *MMO = ST->getMemOperand();
- Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
+ Value);
return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
}
@@ -6374,7 +6408,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
SINT.getOperand(0).getValueType() == MVT::i32) {
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -6419,7 +6453,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue Ld;
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
@@ -6506,7 +6540,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Save FP Control Word to register
EVT NodeTys[] = {
@@ -6727,7 +6761,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
assert(BVN->getNumOperands() == 4 &&
@@ -6760,9 +6794,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
- 16 /* alignment */);
-
+ SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
+ 16 /* alignment */);
+
SmallVector<SDValue, 2> Ops;
Ops.push_back(DAG.getEntryNode());
Ops.push_back(CPIdx);
@@ -7453,7 +7487,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
// Store the input value into Value#0 of the stack slot.
@@ -7499,7 +7533,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue StoreChain = DAG.getEntryNode();
@@ -7651,9 +7685,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getConstant(Idx, dl, getVectorIdxTy()));
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
SDValue Store;
if (ScalarVT != ScalarMemVT)
Store =
@@ -7715,7 +7749,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SmallVector<SDValue, 2> Ops;
@@ -7920,7 +7954,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i1 &&
"Unexpected result type for CTR decrement intrinsic");
- EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+ EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ N->getValueType(0));
SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
N->getOperand(1));
@@ -8248,7 +8283,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
// For v = setjmp(buf), we generate
@@ -8386,7 +8421,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -9032,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+static std::string getRecipOp(const char *Base, EVT VT) {
+ std::string RecipOp(Base);
+ if (VT.getScalarType() == MVT::f64)
+ RecipOp += "d";
+ else
+ RecipOp += "f";
+
+ if (VT.isVector())
+ RecipOp = "vec-" + RecipOp;
+
+ return RecipOp;
+}
+
SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
@@ -9043,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("sqrt", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
@@ -9066,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("div", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
@@ -9854,7 +9900,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
assert(N->getOpcode() == ISD::SIGN_EXTEND &&
"Invalid extension type");
- EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
SDValue ShiftCst =
DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
@@ -10145,9 +10191,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+ unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
@@ -10219,7 +10265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
2*MemVT.getStoreSize()-1);
// Create the new base load.
- SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
+ SDValue LDXIntID =
+ DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue BaseLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
@@ -10243,7 +10290,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (!findConsecutiveLoad(LD, DAG))
--IncValue;
- SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
+ SDValue Increment =
+ DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
MachineMemOperand *ExtraMMO =
@@ -10691,7 +10739,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
/// getConstraintType - Given a constraint, return the type of
/// constraint it is for this target.
PPCTargetLowering::ConstraintType
-PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+PPCTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -10776,7 +10824,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass *>
PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
@@ -10923,8 +10971,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
-bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// PPC does not allow r+i addressing modes for vectors!
if (Ty->isVectorTy() && AM.BaseOffs != 0)
@@ -10977,22 +11025,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = Subtarget.isPPC64();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
isPPC64 ? MVT::i64 : MVT::i32);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, getPointerTy(),
- FrameAddr, Offset),
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+ MachinePointerInfo(), false, false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -11000,13 +11048,13 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- bool isPPC64 = PtrVT == MVT::i64;
-
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+
// Naked functions never have a frame pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned FrameReg;
@@ -11026,8 +11074,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 02242b5..6e13533 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -423,7 +423,9 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
bool isCheapToSpeculateCttz() const override {
return true;
@@ -434,7 +436,8 @@ namespace llvm {
}
/// getSetCCResultType - Return the ISD::SETCC ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
/// Return true if target always beneficiates from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
@@ -487,7 +490,8 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
@@ -519,8 +523,7 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -529,13 +532,13 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty) const override;
+ unsigned getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
@@ -544,8 +547,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "es")
return InlineAsm::Constraint_es;
else if (ConstraintCode == "o")
@@ -561,8 +564,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -745,7 +748,7 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
- bool isVarArg, bool IsPatchPoint,
+ bool isVarArg, bool IsPatchPoint, bool hasNest,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 696a838..bf6e402 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
cl::Hidden);
+static cl::opt<bool>
+UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
+ cl::desc("Use the old (incorrect) instruction latency calculation"));
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return new ScoreboardHazardRecognizer(II, DAG);
}
+unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || UseOldLatencyCalc)
+ return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+
+ // The default implementation of getInstrLatency calls getStageLatency, but
+ // getStageLatency does not do the right thing for us. While we have
+ // itinerary, most cores are fully pipelined, and so the itineraries only
+ // express the first part of the pipeline, not every stage. Instead, we need
+ // to use the listed output operand cycle number (using operand 0 here, which
+ // is an output).
+
+ unsigned Latency = 1;
+ unsigned DefClass = MI->getDesc().getSchedClass();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ continue;
+
+ int Cycle = ItinData->getOperandCycle(DefClass, i);
+ if (Cycle < 0)
+ continue;
+
+ Latency = std::max(Latency, (unsigned) Cycle);
+ }
+
+ return Latency;
+}
int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e2d6346..40badae 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -95,6 +95,10 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = nullptr) const override;
+
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 43ba499..20c95fe 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -989,6 +989,18 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
(XVDIVDP $A, $B)>;
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
} // AddedComplexity
} // HasVSX
@@ -1013,6 +1025,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
v4i32:$XB)))]>;
} // isCommutable
+ def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 656376c..2b09b2f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -165,8 +165,7 @@ void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -209,7 +208,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- if (PPCFI->needsFP(MF))
+ if (TFI->needsFP(MF))
Reserved.set(PPC::X31);
if (hasBasePointer(MF))
@@ -230,7 +229,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- if (PPCFI->needsFP(MF))
+ if (TFI->needsFP(MF))
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
@@ -256,8 +255,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -341,7 +339,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned TargetAlign = TFI->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -864,8 +863,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -908,10 +906,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -946,11 +944,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
- unsigned StackEst =
- PPCFI->determineFrameLayout(MF, false, true);
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
// If we likely don't need a stack frame, then we probably don't need a
// virtual base register either.
@@ -1034,4 +1029,3 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
-
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index 635d154..267f567 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_VS1, P7_VS2]>],
[5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_VS1, P7_VS2]>],
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index 020739b..69e6d05 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FPU1, P8_FPU2]>],
[5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FPU1, P8_FPU2]>],
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
deleted file mode 100644
index dc16742..0000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PPCSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCTargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "powerpc-selectiondag-info"
-
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
- : TargetSelectionDAGInfo(DL) {}
-
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
deleted file mode 100644
index 2c1378d..0000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class PPCTargetMachine;
-
-class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit PPCSelectionDAGInfo(const DataLayout *DL);
- ~PPCSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf603fe..58dacca 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -53,7 +53,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le),
TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
+ InstrInfo(*this), TLInfo(TM, *this) {}
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e9cc3d4..0616c1f 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -17,10 +17,10 @@
#include "PPCFrameLowering.h"
#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -129,7 +129,7 @@ protected:
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
+ TargetSelectionDAGInfo TSInfo;
public:
/// This constructor initializes the data members to match that
@@ -164,7 +164,7 @@ public:
const PPCTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const PPCRegisterInfo *getRegisterInfo() const override {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 074bc87..1daf244 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)) {
+ TargetABI(computeTargetABI(TT, Options)),
+ Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+ RefinementSteps64 = RefinementSteps + 1;
+
+ this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+ this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 5c0f7e6..6496339 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,6 +29,8 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
+ PPCSubtarget Subtarget;
+
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 25d563a..e21c2b7 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -317,7 +317,7 @@ unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35e7a14..368bef9 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -38,7 +38,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
PPCTTIImpl(const PPCTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
PPCTTIImpl(PPCTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- PPCTTIImpl &operator=(const PPCTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- PPCTTIImpl &operator=(PPCTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f352fa6..58d3c3d 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -136,6 +136,16 @@ protected:
// source of the copy, it must still be live here. We can't use
// interval testing for a physical register, so as long as we're
// walking the MIs we may as well test liveness here.
+ //
+ // FIXME: There is a case that occurs in practice, like this:
+ // %vreg9<def> = COPY %F1; VSSRC:%vreg9
+ // ...
+ // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9
+ // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9
+ // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC:
+ // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC:
+ // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC:
+ // which prevents an otherwise-profitable transformation.
bool OtherUsers = false, KillsAddendSrc = false;
for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
J != JE; --J) {
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index e7ab71a..3fb1dcc 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -80,6 +80,7 @@ struct PPCVSXSwapEntry {
unsigned int IsSwap : 1;
unsigned int MentionsPhysVR : 1;
unsigned int IsSwappable : 1;
+ unsigned int MentionsPartialVR : 1;
unsigned int SpecialHandling : 3;
unsigned int WebRejected : 1;
unsigned int WillRemove : 1;
@@ -91,7 +92,9 @@ enum SHValues {
SH_INSERT,
SH_NOSWAP_LD,
SH_NOSWAP_ST,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_COPYSCALAR
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -167,6 +170,21 @@ private:
isRegInClass(Reg, &PPC::VRRCRegClass));
}
+ // Return true iff the given register is a partial vector register.
+ bool isScalarVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
+ isRegInClass(Reg, &PPC::VSSRCRegClass));
+ }
+
+ // Return true iff the given register mentions all or part of a
+ // vector register. Also sets Partial to true if the mention
+ // is for just the floating-point register overlap of the register.
+ bool isAnyVecReg(unsigned Reg, bool &Partial) {
+ if (isScalarVecReg(Reg))
+ Partial = true;
+ return isScalarVecReg(Reg) || isVecReg(Reg);
+ }
+
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -223,12 +241,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (MachineInstr &MI : MBB) {
bool RelevantInstr = false;
+ bool Partial = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (isVecReg(Reg)) {
+ if (isAnyVecReg(Reg, Partial)) {
RelevantInstr = true;
break;
}
@@ -250,8 +269,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// Unless noted otherwise, an instruction is considered
// safe for the optimization. There are a large number of
// such true-SIMD instructions (all vector math, logical,
- // select, compare, etc.).
- SwapVector[VecIdx].IsSwappable = 1;
+ // select, compare, etc.). However, if the instruction
+ // mentions a partial vector register and does not have
+ // special handling defined, it is not swappable.
+ if (Partial)
+ SwapVector[VecIdx].MentionsPartialVR = 1;
+ else
+ SwapVector[VecIdx].IsSwappable = 1;
break;
case PPC::XXPERMDI: {
// This is a swap if it is of the form XXPERMDI t, s, s, 2.
@@ -269,25 +293,37 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
VecIdx);
if (trueReg1 == trueReg2)
SwapVector[VecIdx].IsSwap = 1;
- }
+ else {
+ // We can still handle these if the two registers are not
+ // identical, by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+ }
// This is a doubleword splat if it is of the form
// XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
// must look through chains of copy-likes to find the source
// register. We turn off the marking for mention of a physical
// register, because splatting it is safe; the optimization
- // will not swap the value in the physical register.
- else if (immed == 0 || immed == 3) {
+ // will not swap the value in the physical register. Whether
+ // or not the two input registers are identical, we can handle
+ // these by adjusting the form of the XXPERMDI.
+ } else if (immed == 0 || immed == 3) {
+
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+
unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
VecIdx);
unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
VecIdx);
- if (trueReg1 == trueReg2) {
- SwapVector[VecIdx].IsSwappable = 1;
+ if (trueReg1 == trueReg2)
SwapVector[VecIdx].MentionsPhysVR = 0;
- }
+
+ } else {
+ // We can still handle these by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
}
- // Any other form of XXPERMDI is lane-sensitive and unsafe
- // for the optimization.
break;
}
case PPC::LVX:
@@ -324,7 +360,32 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (isVecReg(MI.getOperand(0).getReg()) &&
isVecReg(MI.getOperand(1).getReg()))
SwapVector[VecIdx].IsSwappable = 1;
+ // If we have a copy from one scalar floating-point register
+ // to another, we can accept this even if it is a physical
+ // register. The only way this gets involved is if it feeds
+ // a SUBREG_TO_REG, which is handled by introducing a swap.
+ else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::SUBREG_TO_REG: {
+ // These are fine provided they are moving between full vector
+ // register classes. If they are moving from a scalar
+ // floating-point class to a vector class, we can handle those
+ // as well, provided we introduce a swap. It is generally the
+ // case that we will introduce fewer swaps than we remove, but
+ // (FIXME) a cost model could be used. However, introduced
+ // swaps could potentially be CSEd, so this is not trivial.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(2).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ else if (isVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(2).getReg())) {
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+ }
break;
+ }
case PPC::VSPLTB:
case PPC::VSPLTH:
case PPC::VSPLTW:
@@ -425,6 +486,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::VUPKLSW:
case PPC::XXMRGHW:
case PPC::XXMRGLW:
+ // XXSLDWI could be replaced by a general permute with one of three
+ // permute control vectors (for shift values 1, 2, 3). However,
+ // VPERM has a more restrictive register class.
+ case PPC::XXSLDWI:
case PPC::XXSPLTW:
break;
}
@@ -501,18 +566,20 @@ void PPCVSXSwapRemoval::formWebs() {
DEBUG(MI->dump());
// It's sufficient to walk vector uses and join them to their unique
- // definitions. In addition, check *all* vector register operands
- // for physical regs.
+ // definitions. In addition, check full vector register operands
+ // for physical regs. We exclude partial-vector register operands
+ // because we can handle them if copied to a full vector.
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!isVecReg(Reg))
+ if (!isVecReg(Reg) && !isScalarVecReg(Reg))
continue;
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- SwapVector[EntryIdx].MentionsPhysVR = 1;
+ if (!(MI->isCopy() && isScalarVecReg(Reg)))
+ SwapVector[EntryIdx].MentionsPhysVR = 1;
continue;
}
@@ -545,15 +612,21 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
- // Reject webs containing mentions of physical registers, or containing
- // operations that we don't know how to handle in a lane-permuted region.
+ // If representative is already rejected, don't waste further time.
+ if (SwapVector[Repr].WebRejected)
+ continue;
+
+ // Reject webs containing mentions of physical or partial registers, or
+ // containing operations that we don't know how to handle in a lane-
+ // permuted region.
if (SwapVector[EntryIdx].MentionsPhysVR ||
+ SwapVector[EntryIdx].MentionsPartialVR ||
!(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
+ format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
@@ -588,7 +661,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
}
}
- // Reject webs than contain swapping stores that are fed by something
+ // Reject webs that contain swapping stores that are fed by something
// other than a swap instruction.
} else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
@@ -670,7 +743,8 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
-// FIXME: This code is to be phased in with subsequent patches.
+// FIXME: Additional opportunities will be phased in with subsequent
+// patches.
void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (SwapVector[EntryIdx].SpecialHandling) {
@@ -704,6 +778,91 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
break;
}
+ // For an XXPERMDI that isn't handled otherwise, we need to
+ // reverse the order of the operands. If the selector operand
+ // has a value of 0 or 3, we need to change it to 3 or 0,
+ // respectively. Otherwise we should leave it alone. (This
+ // is equivalent to reversing the two bits of the selector
+ // operand and complementing the result.)
+ case SHValues::SH_XXPERMDI: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing XXPERMDI: ");
+ DEBUG(MI->dump());
+
+ unsigned Selector = MI->getOperand(3).getImm();
+ if (Selector == 0 || Selector == 3)
+ Selector = 3 - Selector;
+ MI->getOperand(3).setImm(Selector);
+
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ // For a copy from a scalar floating-point register to a vector
+ // register, removing swaps will leave the copied value in the
+ // wrong lane. Insert a swap following the copy to fix this.
+ case SHValues::SH_COPYSCALAR: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
+ DEBUG(MI->dump());
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+
+ MI->getOperand(0).setReg(NewVReg);
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+
+ MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+
+ // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
+ // is copying to a VRRC, we need to be careful to avoid a register
+ // assignment problem. In this case we must copy from VRRC to VSRC
+ // prior to the swap, and from VSRC to VRRC following the swap.
+ // Coalescing will usually remove all this mess.
+
+ if (DstRC == &PPC::VRRCRegClass) {
+ unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), VSRCTmp1)
+ .addReg(NewVReg);
+ DEBUG(MI->getNextNode()->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), VSRCTmp2)
+ .addReg(VSRCTmp1)
+ .addReg(VSRCTmp1)
+ .addImm(2);
+ DEBUG(MI->getNextNode()->getNextNode()->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), DstReg)
+ .addReg(VSRCTmp2);
+ DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+
+ } else {
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(NewVReg)
+ .addReg(NewVReg)
+ .addImm(2);
+
+ DEBUG(MI->getNextNode()->dump());
+ }
+ break;
+ }
}
}
@@ -756,6 +915,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
DEBUG(dbgs() << "swap ");
if (SwapVector[EntryIdx].MentionsPhysVR)
DEBUG(dbgs() << "physreg ");
+ if (SwapVector[EntryIdx].MentionsPartialVR)
+ DEBUG(dbgs() << "partialreg ");
if (SwapVector[EntryIdx].IsSwappable) {
DEBUG(dbgs() << "swappable ");
@@ -780,6 +941,12 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
case SH_SPLAT:
DEBUG(dbgs() << "special:splat ");
break;
+ case SH_XXPERMDI:
+ DEBUG(dbgs() << "special:xxpermdi ");
+ break;
+ case SH_COPYSCALAR:
+ DEBUG(dbgs() << "special:copyscalar ");
+ break;
}
}
OpenPOWER on IntegriCloud