summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp955
1 files changed, 743 insertions, 212 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9089c6a..2b9195b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -216,11 +217,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
- // PowerPC does not have BSWAP, CTPOP or CTTZ
+ // PowerPC does not have BSWAP
+ // CTPOP or CTTZ were introduced in P8/P9 respectivelly
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ }
if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
@@ -433,6 +440,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::CTLZ, VT, Expand);
}
+ // Vector instructions introduced in P9
+ if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
+ setOperationAction(ISD::CTTZ, VT, Legal);
+ else
+ setOperationAction(ISD::CTTZ, VT, Expand);
+
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
@@ -489,7 +502,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
@@ -660,6 +672,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ if (Subtarget.hasDirectMove())
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
@@ -1061,6 +1077,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
+ case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
+ case PPCISD::STXSIX: return "PPCISD::STXSIX";
+ case PPCISD::VEXTS: return "PPCISD::VEXTS";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -1832,9 +1851,9 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
return;
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
if (Align >= 4)
return;
@@ -2158,6 +2177,55 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
}
+// For 64-bit PowerPC, prefer the more compact relative encodings.
+// This trades 32 bits per jump table entry for one or two instructions
+// on the jump site.
+unsigned PPCTargetLowering::getJumpTableEncoding() const {
+ if (isJumpTableRelative())
+ return MachineJumpTableInfo::EK_LabelDifference32;
+
+ return TargetLowering::getJumpTableEncoding();
+}
+
+bool PPCTargetLowering::isJumpTableRelative() const {
+ if (Subtarget.isPPC64())
+ return true;
+ return TargetLowering::isJumpTableRelative();
+}
+
+SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (!Subtarget.isPPC64())
+ return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
+
+ switch (getTargetMachine().getCodeModel()) {
+ case CodeModel::Default:
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
+ default:
+ return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()));
+ }
+}
+
+const MCExpr *
+PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,
+ MCContext &Ctx) const {
+ if (!Subtarget.isPPC64())
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+ switch (getTargetMachine().getCodeModel()) {
+ case CodeModel::Default:
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+ default:
+ return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
+ }
+}
+
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -2365,20 +2433,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// If we're comparing for equality to zero, expose the fact that this is
// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
+ if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
+ return V;
+
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
- EVT VT = Op.getOperand(0).getValueType();
- SDValue Zext = Op.getOperand(0);
- if (VT.bitsLT(MVT::i32)) {
- VT = MVT::i32;
- Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
- }
- unsigned Log2b = Log2_32(VT.getSizeInBits());
- SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
- SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
- DAG.getConstant(Log2b, dl, MVT::i32));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
- }
// Leave comparisons against 0 and -1 alone for now, since they're usually
// optimized. FIXME: revisit this when we can custom lower all setcc
// optimizations.
@@ -2679,6 +2737,32 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
+bool
+llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+ int RegsLeft = NumArgRegs - RegNum;
+
+ // Skip if there is not enough registers left for long double type (4 gpr regs
+ // in soft float mode) and put long double argument on the stack.
+ if (RegNum != NumArgRegs && RegsLeft < 4) {
+ for (int i = 0; i < RegsLeft; i++) {
+ State.AllocateReg(ArgRegs[RegNum + i]);
+ }
+ }
+
+ return false;
+}
+
bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -2896,7 +2980,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// AltiVec Technology Programming Interface Manual
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -2956,7 +3040,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
break;
case MVT::v2f64:
case MVT::v2i64:
- RC = &PPC::VSHRCRegClass;
+ RC = &PPC::VRRCRegClass;
break;
case MVT::v4f64:
RC = &PPC::QFRCRegClass;
@@ -2980,8 +3064,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
assert(VA.isMemLoc());
unsigned ArgSize = VA.getLocVT().getStoreSize();
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable);
+ int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3042,10 +3126,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
FuncInfo->setVarArgsStackOffset(
- MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- CCInfo.getNextStackOffset(), true));
+ MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
- FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// The fixed integer arguments of a variadic function are stored to the
@@ -3118,7 +3202,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
assert(!(CallConv == CallingConv::Fast && isVarArg) &&
@@ -3139,10 +3223,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
- static const MCPhysReg VSRH[] = {
- PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
- PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
- };
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
@@ -3231,7 +3311,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// pretend we have an 8-byte item at the current address for that
// purpose.
if (!ObjSize) {
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
continue;
@@ -3246,9 +3326,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI;
if (HasParameterArea ||
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
- FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+ FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
else
- FI = MFI->CreateStackObject(ArgSize, Align, false);
+ FI = MFI.CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
// Handle aggregates smaller than 8 bytes.
@@ -3418,9 +3498,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
- MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
- MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx;
} else {
@@ -3469,7 +3547,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
if (needsLoad) {
if (ObjSize < ArgSize && !isLittleEndian)
CurArgOffset += ArgSize - ObjSize;
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
+ int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
}
@@ -3498,7 +3576,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(PtrByteSize, Depth, true));
+ MFI.CreateFixedObject(PtrByteSize, Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
@@ -3530,7 +3608,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
// TODO: add description of PPC stack frame format, or at least some docs.
//
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -3665,7 +3743,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
+ int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -3698,7 +3776,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
else
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -3735,7 +3813,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
ArgOffset += PtrByteSize;
break;
}
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case MVT::i64: // PPC64
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
@@ -3819,9 +3897,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
// We need to load the argument to a virtual register if we determined above
// that we ran out of physical registers of the appropriate type.
if (needsLoad) {
- int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ int FI = MFI.CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
}
@@ -3852,8 +3930,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true));
+ MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
@@ -3903,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
static bool isFunctionGlobalAddress(SDValue Callee);
static bool
-resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+resideInSameSection(const Function *Caller, SDValue Callee,
+ const TargetMachine &TM) {
// If !G, Callee can be an external symbol.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G) return false;
+ if (!G)
+ return false;
const GlobalValue *GV = G->getGlobal();
-
- if (GV->isDeclaration()) return false;
-
- switch(GV->getLinkage()) {
- default: llvm_unreachable("unknow linkage type");
- case GlobalValue::AvailableExternallyLinkage:
- case GlobalValue::ExternalWeakLinkage:
+ if (!GV->isStrongDefinitionForLinker())
return false;
- // Callee with weak linkage is allowed if it has hidden or protected
- // visibility
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
- case GlobalValue::WeakAnyLinkage:
- case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
- if (GV->hasDefaultVisibility())
+ // Any explicitly-specified sections and section prefixes must also match.
+ // Also, if we're using -ffunction-sections, then each function is always in
+ // a different section (the same is true for COMDAT functions).
+ if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
+ GV->getSection() != Caller->getSection())
+ return false;
+ if (const auto *F = dyn_cast<Function>(GV)) {
+ if (F->getSectionPrefix() != Caller->getSectionPrefix())
return false;
-
- case GlobalValue::ExternalLinkage:
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- break;
}
- // With '-fPIC', calling default visiblity function need insert 'nop' after
- // function call, no matter that function resides in same module or not, so
- // we treat it as in different module.
- if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+ // If the callee might be interposed, then we can't assume the ultimate call
+ // target will be in the same section. Even in cases where we can assume that
+ // interposition won't happen, in any case where the linker might insert a
+ // stub to allow for interposition, we must generate code as though
+ // interposition might occur. To understand why this matters, consider a
+ // situation where: a -> b -> c where the arrows indicate calls. b and c are
+ // in the same section, but a is in a different module (i.e. has a different
+ // TOC base pointer). If the linker allows for interposition between b and c,
+ // then it will generate a stub for the call edge between b and c which will
+ // save the TOC pointer into the designated stack slot allocated by b. If we
+ // return true here, and therefore allow a tail call between b and c, that
+ // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
+ // pointer into the stack slot allocated by a (where the a -> b stub saved
+ // a's TOC base pointer). If we're not considering a tail call, but rather,
+ // whether a nop is needed after the call instruction in b, because the linker
+ // will insert a stub, it might complain about a missing nop if we omit it
+ // (although many don't complain in this case).
+ if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
return false;
return true;
@@ -4037,8 +4121,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
return false;
// Caller contains any byval parameter is not supported.
- if (std::any_of(Ins.begin(), Ins.end(),
- [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
+ if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
return false;
// Callee contains any byval parameter is not supported, too.
@@ -4053,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
!isa<ExternalSymbolSDNode>(Callee))
return false;
- // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
- // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
- // module.
+ // Check if Callee resides in the same section, because for now, PPC64 SVR4
+ // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+ // section.
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
- if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+ if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@@ -4174,8 +4257,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
bool isPPC64 = Subtarget.isPPC64();
int SlotSize = isPPC64 ? 8 : 4;
int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
- int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- NewRetAddrLoc, true);
+ int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -4185,8 +4268,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
// slot as the FP is never overwritten.
if (Subtarget.isDarwinABI()) {
int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
- int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
- true);
+ int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
+ true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
MachinePointerInfo::getFixedStack(
@@ -4203,8 +4286,8 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
- uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
- int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
+ int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue FIN = DAG.getFrameIndex(FI, VT);
TailCallArgumentInfo Info;
@@ -4430,7 +4513,8 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
- ? MachineMemOperand::MOInvariant
+ ? (MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant)
: MachineMemOperand::MONone;
MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
@@ -4514,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
return CallOpc;
}
-static
-bool isLocalCall(const SDValue &Callee)
-{
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return G->getGlobal()->isStrongDefinitionForLinker();
- return false;
-}
-
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4610,7 +4686,7 @@ SDValue PPCTargetLowering::FinishCall(
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
- DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
+ DAG.getMachineFunction().getFrameInfo().setHasTailCall();
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
}
@@ -4623,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
+ MachineFunction &MF = DAG.getMachineFunction();
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
!isPatchPoint) {
if (CallOpc == PPCISD::BCTRL) {
@@ -4646,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
// The address needs to go after the chain input but before the flag (or
// any other variadic arguments).
Ops.insert(std::next(Ops.begin()), AddTOC);
- } else if ((CallOpc == PPCISD::CALL) &&
- (!isLocalCall(Callee) ||
- DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+ } else if (CallOpc == PPCISD::CALL &&
+ !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
+ }
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -5026,10 +5103,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
- static const MCPhysReg VSRH[] = {
- PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
- PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
- };
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
@@ -5456,13 +5529,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
SDValue Load =
DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
MemOpChains.push_back(Load.getValue(1));
-
- unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
- Arg.getSimpleValueType() == MVT::v2i64) ?
- VSRH[VR_idx] : VR[VR_idx];
- ++VR_idx;
-
- RegsToPass.push_back(std::make_pair(VReg, Load));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -5480,12 +5547,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Non-varargs Altivec params go into VRs or on the stack.
if (VR_idx != NumVRs) {
- unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
- Arg.getSimpleValueType() == MVT::v2i64) ?
- VSRH[VR_idx] : VR[VR_idx];
- ++VR_idx;
-
- RegsToPass.push_back(std::make_pair(VReg, Arg));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else {
if (CallConv == CallingConv::Fast)
ComputePtrOff();
@@ -6126,7 +6188,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
// Find out what the fix offset of the frame pointer save area.
int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
+ RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
@@ -6149,7 +6211,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
// Find out what the fix offset of the frame pointer save area.
int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -6183,7 +6245,7 @@ SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
bool isPPC64 = Subtarget.isPPC64();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- int FI = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
+ int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
return DAG.getFrameIndex(FI, PtrVT);
}
@@ -6467,10 +6529,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
- RLI.Alignment,
- RLI.IsInvariant ? MachineMemOperand::MOInvariant
- : MachineMemOperand::MONone,
- RLI.AAInfo, RLI.Ranges);
+ RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
}
// We're trying to insert a regular store, S, and then a load, L. If the
@@ -6513,6 +6572,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
RLI.Chain = LD->getChain();
RLI.MPI = LD->getPointerInfo();
+ RLI.IsDereferenceable = LD->isDereferenceable();
RLI.IsInvariant = LD->isInvariant();
RLI.Alignment = LD->getAlignment();
RLI.AAInfo = LD->getAAInfo();
@@ -6545,11 +6605,17 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
/// \brief Analyze profitability of direct move
/// prefer float load to int load plus direct move
/// when there is no integer use of int load
-static bool directMoveIsProfitable(const SDValue &Op) {
+bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
SDNode *Origin = Op.getOperand(0).getNode();
if (Origin->getOpcode() != ISD::LOAD)
return true;
+ // If there is no LXSIBZX/LXSIHZX, like Power8,
+ // prefer direct move if the memory size is 1 or 2 bytes.
+ MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
+ if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
+ return true;
+
for (SDNode::use_iterator UI = Origin->use_begin(),
UE = Origin->use_end();
UI != UE; ++UI) {
@@ -6705,11 +6771,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
- Bits =
- DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment,
- RLI.IsInvariant ? MachineMemOperand::MOInvariant
- : MachineMemOperand::MONone,
- RLI.AAInfo, RLI.Ranges);
+ Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
+ RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
} else if (Subtarget.hasLFIWAX() &&
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
@@ -6736,10 +6799,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
(Subtarget.hasFPCVT() &&
SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
SINT.getOperand(0).getValueType() == MVT::i32) {
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -6782,7 +6845,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
EVT PtrVT = getPointerTy(MF.getDataLayout());
SDValue Ld;
@@ -6791,7 +6854,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
bool ReusingLoad;
if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
DAG))) {
- int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -6823,7 +6886,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ int FrameIdx = MFI.CreateStackObject(8, 8, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
@@ -6882,7 +6945,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
// Save FP register to stack slot
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
MachinePointerInfo());
@@ -7068,6 +7131,57 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+/// Do we have an efficient pattern in a .td file for this node?
+///
+/// \param V - pointer to the BuildVectorSDNode being matched
+/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
+///
+/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
+/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
+/// the opposite is true (expansion is beneficial) are:
+/// - The node builds a vector out of integers that are not 32 or 64-bits
+/// - The node builds a vector out of constants
+/// - The node is a "load-and-splat"
+/// In all other cases, we will choose to keep the BUILD_VECTOR.
+static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
+ bool HasDirectMove) {
+ EVT VecVT = V->getValueType(0);
+ bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
+ (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
+ if (!RightType)
+ return false;
+
+ bool IsSplat = true;
+ bool IsLoad = false;
+ SDValue Op0 = V->getOperand(0);
+
+ // This function is called in a block that confirms the node is not a constant
+ // splat. So a constant BUILD_VECTOR here means the vector is built out of
+ // different constants.
+ if (V->isConstant())
+ return false;
+ for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
+ if (V->getOperand(i).isUndef())
+ return false;
+ // We want to expand nodes that represent load-and-splat even if the
+ // loaded value is a floating point truncation or conversion to int.
+ if (V->getOperand(i).getOpcode() == ISD::LOAD ||
+ (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
+ V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+ (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
+ V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+ (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
+ V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
+ IsLoad = true;
+ // If the operands are different or the input is not a load and has more
+ // uses than just this BV node, then it isn't a splat.
+ if (V->getOperand(i) != Op0 ||
+ (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
+ IsSplat = false;
+ }
+ return !(IsSplat && IsLoad);
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -7083,8 +7197,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// We first build an i32 vector, load it into a QPX register,
// then convert it to a floating-point vector and compare it
// to a zero vector to get the boolean result.
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -7189,8 +7303,15 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
- SplatBitSize > 32)
+ SplatBitSize > 32) {
+ // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
+ // lowered to VSX instructions under certain conditions.
+ // Without VSX, there is no pattern more efficient than expanding the node.
+ if (Subtarget.hasVSX() &&
+ haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
+ return Op;
return SDValue();
+ }
unsigned SplatBits = APSplatBits.getZExtValue();
unsigned SplatUndef = APSplatUndef.getZExtValue();
@@ -7208,6 +7329,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}
+ // We have XXSPLTIB for constant splats one byte wide
+ if (Subtarget.hasP9Vector() && SplatSize == 1) {
+ // This is a splat of 1-byte elements with some elements potentially undef.
+ // Rather than trying to match undef in the SDAG patterns, ensure that all
+ // elements are the same constant.
+ if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
+ SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
+ dl, MVT::i32));
+ SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
+ if (Op.getValueType() != MVT::v16i8)
+ return DAG.getBitcast(Op.getValueType(), NewBV);
+ return NewBV;
+ }
+ return Op;
+ }
+
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize));
@@ -7451,6 +7588,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+
+ // If the source for the shuffle is a scalar_to_vector that came from a
+ // 32-bit load, it will have used LXVWSX so we don't need to splat again.
+ if (Subtarget.hasP9Vector() &&
+ ((isLittleEndian && SplatIdx == 3) ||
+ (!isLittleEndian && SplatIdx == 0))) {
+ SDValue Src = V1.getOperand(0);
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Src.getOperand(0).getOpcode() == ISD::LOAD &&
+ Src.getOperand(0).hasOneUse())
+ return V1;
+ }
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
DAG.getConstant(SplatIdx, dl, MVT::i32));
@@ -7662,6 +7811,27 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpneb_p:
+ case Intrinsic::ppc_altivec_vcmpneh_p:
+ case Intrinsic::ppc_altivec_vcmpnew_p:
+ case Intrinsic::ppc_altivec_vcmpnezb_p:
+ case Intrinsic::ppc_altivec_vcmpnezh_p:
+ case Intrinsic::ppc_altivec_vcmpnezw_p:
+ if (Subtarget.hasP9Altivec()) {
+ switch(IntrinsicID) {
+ default: llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
+ case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
+ case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
+ case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
+ case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
+ case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+ }
+ isDot = 1;
+ } else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
@@ -7723,6 +7893,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpneb:
+ case Intrinsic::ppc_altivec_vcmpneh:
+ case Intrinsic::ppc_altivec_vcmpnew:
+ case Intrinsic::ppc_altivec_vcmpnezb:
+ case Intrinsic::ppc_altivec_vcmpnezh:
+ case Intrinsic::ppc_altivec_vcmpnezw:
+ if (Subtarget.hasP9Altivec()) {
+ switch (IntrinsicID) {
+ default: llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
+ case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
+ case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
+ case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
+ case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
+ case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+ }
+ isDot = 0;
+ } else
+ return false;
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
@@ -7857,8 +8047,8 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, 16, false);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -7909,8 +8099,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
Value);
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8109,8 +8299,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
Value);
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8545,6 +8735,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -8574,7 +8765,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
@@ -8619,8 +8811,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9325,6 +9518,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
bool is64bit = Subtarget.isPPC64();
+ bool isLittleEndian = Subtarget.isLittleEndian();
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI.getOperand(0).getReg();
@@ -9351,7 +9545,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
: &PPC::GPRCRegClass;
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
- unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg =
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
@@ -9406,8 +9601,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (!isLittleEndian)
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
.addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9532,23 +9728,21 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-static std::string getRecipOp(const char *Base, EVT VT) {
- std::string RecipOp(Base);
+static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- RecipOp += "d";
- else
- RecipOp += "f";
-
- if (VT.isVector())
- RecipOp = "vec-" + RecipOp;
-
- return RecipOp;
+ RefinementSteps++;
+ return RefinementSteps;
}
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
- bool &UseOneConstNR) const {
+SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
+ bool &UseOneConstNR,
+ bool Reciprocal) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
@@ -9556,21 +9750,18 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
- std::string RecipOp = getRecipOp("sqrt", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
- return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+ return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
-SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled,
+ int &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
@@ -9578,13 +9769,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
- std::string RecipOp = getRecipOp("div", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
@@ -9635,13 +9822,13 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
return false;
- const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
+ int FS = MFI.getObjectSize(FI);
+ int BFS = MFI.getObjectSize(BFI);
if (FS != BFS || FS != (int)Bytes) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
}
SDValue Base1 = Loc, Base2 = BaseLoc;
@@ -9699,9 +9886,11 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
VT = MVT::v4i32;
break;
case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
VT = MVT::v2f64;
break;
case Intrinsic::ppc_altivec_lvebx:
@@ -9748,6 +9937,12 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
case Intrinsic::ppc_vsx_stxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ VT = MVT::v2f64;
+ break;
case Intrinsic::ppc_altivec_stvebx:
VT = MVT::i8;
break;
@@ -9833,6 +10028,87 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
+
+/// This function is called when we have proved that a SETCC node can be replaced
+/// by subtraction (and other supporting instructions) so that the result of
+/// comparison is kept in a GPR instead of CR. This function is purely for
+/// codegen purposes and has some flags to guide the codegen process.
+static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
+ bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+ assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+ // Zero extend the operands to the largest legal integer. Originally, they
+ // must be of a strictly smaller size.
+ auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+ DAG.getConstant(Size, DL, MVT::i32));
+ auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+ DAG.getConstant(Size, DL, MVT::i32));
+
+ // Swap if needed. Depends on the condition code.
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Subtract extended integers.
+ auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+ // Move the sign bit to the least significant position and zero out the rest.
+ // Now the least significant bit carries the result of original comparison.
+ auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+ DAG.getConstant(Size - 1, DL, MVT::i32));
+ auto Final = Shifted;
+
+ // Complement the result if needed. Based on the condition code.
+ if (Complement)
+ Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+ DAG.getConstant(1, DL, MVT::i64));
+
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+
+ assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ // Size of integers being compared has a critical role in the following
+ // analysis, so we prefer to do this when all types are legal.
+ if (!DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ // If all users of SETCC extend its value to a legal integer type
+ // then we replace SETCC with a subtraction
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ }
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+ unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
+
+ if (OpSize < Size) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ return generateEquivalentSub(N, Size, false, false, DL, DAG);
+ case ISD::SETULE:
+ return generateEquivalentSub(N, Size, true, true, DL, DAG);
+ case ISD::SETUGT:
+ return generateEquivalentSub(N, Size, false, true, DL, DAG);
+ case ISD::SETUGE:
+ return generateEquivalentSub(N, Size, true, false, DL, DAG);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -9874,7 +10150,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
APInt::getHighBitsSet(OpBits, OpBits-1)) ||
!DAG.MaskedValueIsZero(N->getOperand(1),
APInt::getHighBitsSet(OpBits, OpBits-1)))
- return SDValue();
+ return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+ : SDValue());
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
@@ -10398,6 +10675,173 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+/// \brief Reduces the number of fp-to-int conversion when building a vector.
+///
+/// If this vector is built out of floating to integer conversions,
+/// transform it to a vector built out of floating point values followed by a
+/// single floating to integer conversion of the vector.
+/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
+/// becomes (fptosi (build_vector ($A, $B, ...)))
+SDValue PPCTargetLowering::
+combineElementTruncationToVectorTruncation(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ "Should be called with a BUILD_VECTOR node");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ SDValue FirstInput = N->getOperand(0);
+ assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
+ "The input operand must be an fp-to-int conversion.");
+
+ // This combine happens after legalization so the fp_to_[su]i nodes are
+ // already converted to PPCSISD nodes.
+ unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
+ if (FirstConversion == PPCISD::FCTIDZ ||
+ FirstConversion == PPCISD::FCTIDUZ ||
+ FirstConversion == PPCISD::FCTIWZ ||
+ FirstConversion == PPCISD::FCTIWUZ) {
+ bool IsSplat = true;
+ bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
+ FirstConversion == PPCISD::FCTIWUZ;
+ EVT SrcVT = FirstInput.getOperand(0).getValueType();
+ SmallVector<SDValue, 4> Ops;
+ EVT TargetVT = N->getValueType(0);
+ for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
+ if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
+ return SDValue();
+ unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
+ if (NextConversion != FirstConversion)
+ return SDValue();
+ if (N->getOperand(i) != FirstInput)
+ IsSplat = false;
+ }
+
+ // If this is a splat, we leave it as-is since there will be only a single
+ // fp-to-int conversion followed by a splat of the integer. This is better
+ // for 32-bit and smaller ints and neutral for 64-bit ints.
+ if (IsSplat)
+ return SDValue();
+
+ // Now that we know we have the right type of node, get its operands
+ for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
+ SDValue In = N->getOperand(i).getOperand(0);
+ // For 32-bit values, we need to add an FP_ROUND node.
+ if (Is32Bit) {
+ if (In.isUndef())
+ Ops.push_back(DAG.getUNDEF(SrcVT));
+ else {
+ SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, In.getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+ Ops.push_back(Trunc);
+ }
+ } else
+ Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
+ }
+
+ unsigned Opcode;
+ if (FirstConversion == PPCISD::FCTIDZ ||
+ FirstConversion == PPCISD::FCTIWZ)
+ Opcode = ISD::FP_TO_SINT;
+ else
+ Opcode = ISD::FP_TO_UINT;
+
+ EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
+ SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
+ return DAG.getNode(Opcode, dl, TargetVT, BV);
+ }
+ return SDValue();
+}
+
+/// \brief Reduce the number of loads when building a vector.
+///
+/// Building a vector out of multiple loads can be converted to a load
+/// of the vector type if the loads are consecutive. If the loads are
+/// consecutive but in descending order, a shuffle is added at the end
+/// to reorder the vector.
+static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ "Should be called with a BUILD_VECTOR node");
+
+ SDLoc dl(N);
+ bool InputsAreConsecutiveLoads = true;
+ bool InputsAreReverseConsecutive = true;
+ unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+ SDValue FirstInput = N->getOperand(0);
+ bool IsRoundOfExtLoad = false;
+
+ if (FirstInput.getOpcode() == ISD::FP_ROUND &&
+ FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
+ IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
+ }
+ // Not a build vector of (possibly fp_rounded) loads.
+ if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
+ // If any inputs are fp_round(extload), they all must be.
+ if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
+ return SDValue();
+
+ SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
+ N->getOperand(i);
+ if (NextInput.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ SDValue PreviousInput =
+ IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
+
+ // If any inputs are fp_round(extload), they all must be.
+ if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
+ return SDValue();
+
+ if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
+ InputsAreConsecutiveLoads = false;
+ if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
+ InputsAreReverseConsecutive = false;
+
+ // Exit early if the loads are neither consecutive nor reverse consecutive.
+ if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
+ return SDValue();
+ }
+
+ assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
+ "The loads cannot be both consecutive and reverse consecutive.");
+
+ SDValue FirstLoadOp =
+ IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
+ SDValue LastLoadOp =
+ IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
+ N->getOperand(N->getNumOperands()-1);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
+ LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
+ if (InputsAreConsecutiveLoads) {
+ assert(LD1 && "Input needs to be a LoadSDNode.");
+ return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ LD1->getAlignment());
+ }
+ if (InputsAreReverseConsecutive) {
+ assert(LDL && "Input needs to be a LoadSDNode.");
+ SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
+ LDL->getBasePtr(), LDL->getPointerInfo(),
+ LDL->getAlignment());
+ SmallVector<int, 16> Ops;
+ for (int i = N->getNumOperands() - 1; i >= 0; i--)
+ Ops.push_back(i);
+
+ return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
+ DAG.getUNDEF(N->getValueType(0)), Ops);
+ }
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -10405,21 +10849,41 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
+
+ if (!Subtarget.hasVSX())
+ return SDValue();
+
+ // The target independent DAG combiner will leave a build_vector of
+ // float-to-int conversions intact. We can generate MUCH better code for
+ // a float-to-int conversion of a vector of floats.
+ SDValue FirstInput = N->getOperand(0);
+ if (FirstInput.getOpcode() == PPCISD::MFVSR) {
+ SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
+ if (Reduced)
+ return Reduced;
+ }
+
+ // If we're building a vector out of consecutive loads, just load that
+ // vector type.
+ SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
+ if (Reduced)
+ return Reduced;
+
+ if (N->getValueType(0) != MVT::v2f64)
return SDValue();
// Looking for:
// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
- if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
- N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
+ if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
+ FirstInput.getOpcode() != ISD::UINT_TO_FP)
return SDValue();
if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
return SDValue();
- if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
+ if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
return SDValue();
- SDValue Ext1 = N->getOperand(0).getOperand(0);
+ SDValue Ext1 = FirstInput.getOperand(0);
SDValue Ext2 = N->getOperand(1).getOperand(0);
if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
@@ -10464,6 +10928,34 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
SDLoc dl(N);
SDValue Op(N, 0);
+ SDValue FirstOperand(Op.getOperand(0));
+ bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
+ (FirstOperand.getValueType() == MVT::i8 ||
+ FirstOperand.getValueType() == MVT::i16);
+ if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ bool DstDouble = Op.getValueType() == MVT::f64;
+ unsigned ConvOp = Signed ?
+ (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
+ (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
+ SDValue WidthConst =
+ DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
+ dl, false);
+ LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
+ SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
+ SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i8, LDN->getMemOperand());
+
+ // For signed conversion, we need to sign-extend the value in the VSR
+ if (Signed) {
+ SDValue ExtOps[] = { Ld, WidthConst };
+ SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
+ return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
+ } else
+ return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
+ }
+
// Don't handle ppc_fp128 here or i1 conversions.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
@@ -10676,10 +11168,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
+ EVT Op1VT = N->getOperand(1).getValueType();
+ bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
+ (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
+
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
- N->getOperand(1).getValueType() == MVT::i32 &&
+ ValidTypeForStoreFltAsInt &&
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
SDValue Val = N->getOperand(1).getOperand(0);
if (Val.getValueType() == MVT::f32) {
@@ -10689,15 +11185,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
- SDValue Ops[] = {
- N->getOperand(0), Val, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
- };
+ if (Op1VT == MVT::i32) {
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops,
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ } else {
+ unsigned WidthInBytes =
+ N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
+ SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
+
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2), WidthConst,
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
+ DAG.getVTList(MVT::Other), Ops,
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
- Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops,
- cast<StoreSDNode>(N)->getMemoryVT(),
- cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -10726,10 +11238,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
// For little endian, VSX stores require generating xxswapd/lxvd2x.
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
EVT VT = N->getOperand(1).getValueType();
if (VT.isSimple()) {
MVT StoreVT = VT.getSimpleVT();
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ if (Subtarget.needsSwapsForVSXMemOps() &&
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
return expandVSXStoreForLE(N, DCI);
@@ -10741,9 +11254,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = LD->getValueType(0);
// For little endian, VSX loads require generating lxvd2x/xxswapd.
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (VT.isSimple()) {
MVT LoadVT = VT.getSimpleVT();
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ if (Subtarget.needsSwapsForVSXMemOps() &&
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
return expandVSXLoadForLE(N, DCI);
@@ -11014,11 +11528,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
- if (DAG.MaskedValueIsZero(
- Add->getOperand(1),
- APInt::getAllOnesValue(Bits /* alignment */)
- .zext(
- Add.getValueType().getScalarType().getSizeInBits()))) {
+ if (DAG.MaskedValueIsZero(Add->getOperand(1),
+ APInt::getAllOnesValue(Bits /* alignment */)
+ .zext(Add.getScalarValueSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
for (SDNode::use_iterator UI = BasePtr->use_begin(),
UE = BasePtr->use_end();
@@ -11060,7 +11572,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::INTRINSIC_W_CHAIN: {
// For little endian, VSX loads require generating lxvd2x/xxswapd.
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+ if (Subtarget.needsSwapsForVSXMemOps()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
@@ -11073,7 +11586,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::INTRINSIC_VOID: {
// For little endian, VSX stores require generating xxswapd/stxvd2x.
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
+ if (Subtarget.needsSwapsForVSXMemOps()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
@@ -11392,7 +11906,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
uint64_t LoopSize = 0;
for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
- LoopSize += TII->GetInstSizeInBytes(*J);
+ LoopSize += TII->getInstSizeInBytes(*J);
if (LoopSize > 32)
break;
}
@@ -11688,8 +12202,8 @@ bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setReturnAddressIsTaken(true);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
@@ -11726,8 +12240,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
EVT PtrVT = getPointerTy(MF.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
@@ -12237,3 +12751,20 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget.isTargetLinux())
return TargetLowering::insertSSPDeclarations(M);
}
+
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+
+ if (!VT.isSimple() || !Subtarget.hasVSX())
+ return false;
+
+ switch(VT.getSimpleVT().SimpleTy) {
+ default:
+ // For FP types that are currently not supported by PPC backend, return
+ // false. Examples: f16, f80.
+ return false;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::ppcf128:
+ return Imm.isPosZero();
+ }
+}
OpenPOWER on IntegriCloud