summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1056
1 files changed, 714 insertions, 342 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3fcafdc..8da5f05 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,21 +37,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -64,14 +50,15 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
+ if (TM.getSubtargetImpl()->isSVR4ABI())
+ return new PPC64LinuxTargetObjectFile();
+
return new TargetLoweringObjectFileELF();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
- PPCRegInfo = TM.getRegisterInfo();
- PPCII = TM.getInstrInfo();
setPow2DivIsCheap();
@@ -162,28 +149,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -241,11 +224,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
-
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
@@ -298,8 +276,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ if (Subtarget->isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
// Use the default implementation.
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
@@ -309,6 +292,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // To handle counter-based loop conditions.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -407,6 +393,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -501,6 +488,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
@@ -529,9 +519,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
@@ -564,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
@@ -583,24 +578,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -634,7 +652,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -642,6 +660,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::BDNZ: return "PPCISD::BDNZ";
+ case PPCISD::BDZ: return "PPCISD::BDZ";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
@@ -662,10 +682,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
+ case PPCISD::SC: return "PPCISD::SC";
}
}
-EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -1036,24 +1057,68 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
return false;
}
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.
+/// represented as reg+reg. If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG,
+ bool Aligned) const {
// FIXME dl should come from parent load or store, not from address
- DebugLoc dl = N.getDebugLoc();
+ SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG))
return false;
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm)) {
- Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
@@ -1072,7 +1137,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
} else if (N.getOpcode() == ISD::OR) {
short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm)) {
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -1083,7 +1149,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
Base = N.getOperand(0);
- Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
}
@@ -1093,7 +1159,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
short Imm;
- if (isIntS16Immediate(CN, Imm)) {
+ if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -1101,8 +1167,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
// Handle 32-bit sext immediates with LIS + addr mode.
- if (CN->getValueType(0) == MVT::i32 ||
- (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ if ((CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+ (!Aligned || (CN->getZExtValue() & 3) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -1116,9 +1183,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
Base = N;
return true; // [r+0]
}
@@ -1150,92 +1218,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
-/// SelectAddressRegImmShift - Returns true if the address N can be
-/// represented by a base register plus a signed 14-bit displacement
-/// [r+imm*4]. Suitable for use by STD and friends.
-bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
- SDValue &Base,
- SelectionDAG &DAG) const {
- // FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = N.getDebugLoc();
- // If this can be more profitably realized as r+r, fail.
- if (SelectAddressRegReg(N, Disp, Base, DAG))
- return false;
-
- if (N.getOpcode() == ISD::ADD) {
- short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
- Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- } else {
- Base = N.getOperand(0);
- }
- return true; // [r+i]
- } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
- // Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
- && "Cannot handle constant offsets yet!");
- Disp = N.getOperand(1).getOperand(0); // The global address.
- assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
- Disp.getOpcode() == ISD::TargetConstantPool ||
- Disp.getOpcode() == ISD::TargetJumpTable);
- Base = N.getOperand(0);
- return true; // [&g+r]
- }
- } else if (N.getOpcode() == ISD::OR) {
- short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- // If this is an or of disjoint bitfields, we can codegen this as an add
- // (for better address arithmetic) if the LHS and RHS of the OR are
- // provably disjoint.
- APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
- if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
- // If all of the bits are known zero on the LHS or RHS, the add won't
- // carry.
- Base = N.getOperand(0);
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
- return true;
- }
- }
- } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
- // Loading from a constant address. Verify low two bits are clear.
- if ((CN->getZExtValue() & 3) == 0) {
- // If this address fits entirely in a 14-bit sext immediate field, codegen
- // this as "d, 0"
- short Imm;
- if (isIntS16Immediate(CN, Imm)) {
- Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
- CN->getValueType(0));
- return true;
- }
-
- // Fold the low-part of 32-bit absolute addresses into addr mode.
- if (CN->getValueType(0) == MVT::i32 ||
- (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
- int Addr = (int)CN->getZExtValue();
-
- // Otherwise, break this down into an LIS + disp.
- Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
- Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
- unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
- Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
- return true;
- }
- }
- }
-
- Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
- Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
- Base = N;
- return true; // [r+0]
-}
-
-
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -1288,18 +1270,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
return true;
}
- // LDU/STU use reg+imm*4, others use reg+imm.
+ // LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- // reg + imm
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- // reg + imm * 4.
- if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
return false;
}
@@ -1324,8 +1304,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
unsigned &LoOpFlags, const GlobalValue *GV = 0) {
- HiOpFlags = PPCII::MO_HA16;
- LoOpFlags = PPCII::MO_LO16;
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
// Don't use the pic base if not in PIC relocation model. Or if we are on a
// non-darwin platform. We don't support PIC on other platforms yet.
@@ -1355,7 +1335,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
SelectionDAG &DAG) {
EVT PtrVT = HiPart.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- DebugLoc DL = HiPart.getDebugLoc();
+ SDLoc DL(HiPart);
SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1380,7 +1360,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
@@ -1401,7 +1381,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
@@ -1428,8 +1408,12 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
@@ -1438,9 +1422,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
+ PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ PPCII::MO_TPREL_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
@@ -1452,12 +1436,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, TPOffsetHi);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
@@ -1515,7 +1501,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- DebugLoc DL = GSDN->getDebugLoc();
+ SDLoc DL(GSDN);
const GlobalValue *GV = GSDN->getGlobal();
// 64-bit SVR4 ABI code is always position-independent.
@@ -1546,7 +1532,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
@@ -1595,7 +1581,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
@@ -1695,6 +1681,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
@@ -1706,7 +1704,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
@@ -1748,7 +1746,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -1842,18 +1840,24 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1876,11 +1880,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1931,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
if (PPCSubTarget.isSVR4ABI()) {
@@ -1953,7 +1957,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// 32-bit SVR4 ABI Stack Frame Layout:
@@ -2170,14 +2174,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
SelectionDAG &DAG, SDValue ArgVal,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (Flags.isSExt())
ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
else if (Flags.isZExt())
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
-
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
}
@@ -2213,7 +2217,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
@@ -2304,6 +2308,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -2502,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
@@ -2600,17 +2611,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
- // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
- // When passing anonymous aggregates, this is currently not true.
- // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -3002,9 +3013,9 @@ struct TailCallArgumentInfo {
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
- const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
- SmallVector<SDValue, 8> &MemOpChains,
- DebugLoc dl) {
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
SDValue FIN = TailCallArgs[i].FrameIdxOp;
@@ -3026,7 +3037,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int SPDiff,
bool isPPC64,
bool isDarwinABI,
- DebugLoc dl) {
+ SDLoc dl) {
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
@@ -3061,7 +3072,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
@@ -3083,7 +3094,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDValue &LROpOut,
SDValue &FPOpOut,
bool isDarwinABI,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -3113,7 +3124,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
false, false, MachinePointerInfo(0),
@@ -3126,9 +3137,9 @@ static void
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
- bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
- DebugLoc dl) {
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
+ SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
if (isVector) {
@@ -3149,9 +3160,9 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
static
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
- DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
@@ -3171,15 +3182,15 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
// Emit callseq_end just before tailcall node.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
- SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
@@ -3363,7 +3374,7 @@ SDValue
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -3406,7 +3417,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
}
SDValue
-PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -3476,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
@@ -3493,7 +3506,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
- InFlag);
+ InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -3505,10 +3518,10 @@ SDValue
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
@@ -3542,7 +3555,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
@@ -3628,7 +3641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
@@ -3679,7 +3693,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// This must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
@@ -3755,13 +3770,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue CallSeqStart,
ISD::ArgFlagsTy Flags,
SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
@@ -3774,7 +3790,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
@@ -3815,7 +3831,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -3889,6 +3906,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3940,7 +3966,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// register.
// FIXME: The memcpy seems to produce pretty awful code for
// small aggregates, particularly for packed ones.
- // FIXME: It would be preferable to use the slot in the
+ // FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
@@ -3980,7 +4006,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4003,7 +4029,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4145,7 +4171,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
@@ -4186,7 +4212,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -4310,7 +4337,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4502,7 +4529,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -4551,7 +4578,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// When we pop the dynamic allocation we need to restore the SP link.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4636,7 +4663,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4653,7 +4680,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
@@ -4661,7 +4688,7 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
@@ -4687,7 +4714,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT CmpVT = Op.getOperand(0).getValueType();
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
@@ -4768,14 +4795,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Split this code up when LegalizeDAGTypes lands.
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -4827,7 +4854,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
@@ -4961,7 +4988,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
/*
The rounding mode is in bits 30:31 of FPSR, and has the following
settings:
@@ -5027,7 +5054,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
"Unexpected SHL!");
@@ -5055,7 +5082,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
@@ -5083,7 +5110,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
@@ -5118,7 +5145,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
/// BuildSplatI - Build a canonical splati of Val with an element size of
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
- SelectionDAG &DAG, DebugLoc dl) {
+ SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
static const EVT VTys[] = { // canonical VT to use for each size.
@@ -5142,10 +5169,20 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op);
+}
+
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, DebugLoc dl,
+ SelectionDAG &DAG, SDLoc dl,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5156,7 +5193,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
SDValue Op2, SelectionDAG &DAG,
- DebugLoc dl, EVT DestVT = MVT::Other) {
+ SDLoc dl, EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -5166,7 +5203,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
- EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ EVT VT, SelectionDAG &DAG, SDLoc dl) {
// Force LHS/RHS to be the right type.
LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
@@ -5185,7 +5222,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
// sequence of ops that should be used.
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
@@ -5341,7 +5378,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -5420,7 +5457,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
/// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5587,7 +5624,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
int CompareOpc;
bool isDot;
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
@@ -5612,7 +5649,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
@@ -5651,7 +5688,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
@@ -5668,7 +5705,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
@@ -5745,6 +5782,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, PPCSubTarget);
+
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -5755,7 +5795,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
- Op.getDebugLoc());
+ SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -5772,6 +5812,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ // For counter-based loop handling.
+ case ISD::INTRINSIC_W_CHAIN: return SDValue();
+
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -5782,10 +5825,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
const TargetMachine &TM = getTargetMachine();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::INTRINSIC_W_CHAIN: {
+ if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+ Intrinsic::ppc_is_decremented_ctr_nonzero)
+ break;
+
+ assert(N->getValueType(0) == MVT::i1 &&
+ "Unexpected result type for CTR decrement intrinsic");
+ EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+ SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+ N->getOperand(1));
+
+ Results.push_back(NewInt);
+ Results.push_back(NewInt.getValue(1));
+ break;
+ }
case ISD::VAARG: {
if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
|| TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -5821,6 +5880,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
@@ -6092,6 +6154,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -6101,15 +6164,32 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
- .addImm(TOCOffset / 4)
+ .addImm(TOCOffset)
.addReg(BufReg);
-
MIB.setMemRefs(MMOBegin, MMOEnd);
}
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+ const PPCRegisterInfo *TRI =
+ static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6129,7 +6209,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
if (PPCSubTarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
- .addImm(LabelOffset / 4)
+ .addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
@@ -6176,12 +6256,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
@@ -6202,7 +6284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Reload IP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
- .addImm(LabelOffset / 4)
+ .addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
@@ -6214,7 +6296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Reload SP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
- .addImm(SPOffset / 4)
+ .addImm(SPOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
@@ -6223,13 +6305,22 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
}
MIB.setMemRefs(MMOBegin, MMOEnd);
- // FIXME: When we also support base pointers, that register must also be
- // restored here.
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
- .addImm(TOCOffset / 4)
+ .addImm(TOCOffset)
.addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -6272,8 +6363,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
- MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+ Cond, MI->getOperand(2).getReg(),
+ MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6633,51 +6726,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6717,7 +6765,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
++Iterations;
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPOne =
DAG.getConstantFP(1.0, VT.getScalarType());
@@ -6779,7 +6827,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
++Iterations;
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPThreeHalves =
DAG.getConstantFP(1.5, VT.getScalarType());
@@ -6823,11 +6871,120 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
return SDValue();
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ EVT VT = LS->getMemoryVT();
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LS->getBasePtr();
+ SDValue BaseLoc = Base->getBasePtr();
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (SDNode::op_iterator O = ChainNext->op_begin(),
+ OE = ChainNext->op_end(); O != OE; ++O)
+ if (!Visited.count(O->getNode()))
+ Queue.push_back(O->getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<LoadSDNode>(*UI) &&
+ cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
case PPCISD::SHL:
@@ -6868,7 +7025,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
@@ -6881,7 +7038,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
N->getOperand(1).getOperand(1));
DCI.AddToWorklist(RV.getNode());
@@ -6909,8 +7066,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode() != 0) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
@@ -6999,6 +7176,160 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
cast<StoreSDNode>(N)->getMemOperand());
}
break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT VT = LD->getValueType(0);
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
+ TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ LD->getAlignment() < ABIAlignment) {
+ // This is a type-legal unaligned Altivec load.
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // This implements the loading of unaligned vectors as described in
+ // the venerable Apple Velocity Engine overview. Specifically:
+ // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+ //
+ // The general idea is to expand a sequence of one or more unaligned
+ // loads into a alignment-based permutation-control instruction (lvsl),
+ // a series of regular vector loads (which always truncate their
+ // input address to an aligned address), and a series of permutations.
+ // The results of these permutations are the requested loaded values.
+ // The trick is that the last "extra" load is not taken from the address
+ // you might suspect (sizeof(vector) bytes after the last requested
+ // load), but rather sizeof(vector) - 1 bytes after the last
+ // requested vector. The point of this is to avoid a page fault if the
+ // base address happend to be aligned. This works because if the base
+ // address is aligned, then adding less than a full vector length will
+ // cause the last vector in the sequence to be (re)loaded. Otherwise,
+ // the next vector will be fetched as you might suspect was necessary.
+
+ // We might be able to reuse the permutation generation from
+ // a different base address offset from this one by an aligned amount.
+ // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+ // optimization later.
+ SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
+ DAG, dl, MVT::v16i8);
+
+ // Refine the alignment of the original load (a "new" load created here
+ // which was identical to the first except for the alignment would be
+ // merged with the existing node regardless).
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(LD->getPointerInfo(),
+ LD->getMemOperand()->getFlags(),
+ LD->getMemoryVT().getStoreSize(),
+ ABIAlignment);
+ LD->refineAlignment(MMO);
+ SDValue BaseLoad = SDValue(LD, 0);
+
+ // Note that the value of IncOffset (which is provided to the next
+ // load's pointer info offset value, and thus used to calculate the
+ // alignment), and the value of IncValue (which is actually used to
+ // increment the pointer value) are different! This is because we
+ // require the next load to appear to be aligned, even though it
+ // is actually offset from the base pointer by a lesser amount.
+ int IncOffset = VT.getSizeInBits() / 8;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
+ SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+ SDValue ExtraLoad =
+ DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncOffset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), ABIAlignment);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+ if (BaseLoad.getValueType() != MVT::v4i32)
+ BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
+
+ if (ExtraLoad.getValueType() != MVT::v4i32)
+ ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
+
+ SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+ if (VT != MVT::v4i32)
+ Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+
+ // Now we need to be really careful about how we update the users of the
+ // original load. We cannot just call DCI.CombineTo (or
+ // DAG.ReplaceAllUsesWith for that matter), because the load still has
+ // uses created here (the permutation for example) that need to stay.
+ SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ while (UI != UE) {
+ SDUse &Use = UI.getUse();
+ SDNode *User = *UI;
+ // Note: BaseLoad is checked here because it might not be N, but a
+ // bitcast of N.
+ if (User == Perm.getNode() || User == BaseLoad.getNode() ||
+ User == TF.getNode() || Use.getResNo() > 1) {
+ ++UI;
+ continue;
+ }
+
+ SDValue To = Use.getResNo() ? TF : Perm;
+ ++UI;
+
+ SmallVector<SDValue, 8> Ops;
+ for (SDNode::op_iterator O = User->op_begin(),
+ OE = User->op_end(); O != OE; ++O) {
+ if (*O == Use)
+ Ops.push_back(To);
+ else
+ Ops.push_back(*O);
+ }
+
+ DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+ }
+
+ return SDValue(N, 0);
+ }
+ }
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
+ Intrinsic::ppc_altivec_lvsl &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ SDValue Add = N->getOperand(1);
+
+ if (DAG.MaskedValueIsZero(Add->getOperand(1),
+ APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ Intrinsic::ppc_altivec_lvsl) {
+ // We've found another LVSL, and this address if an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
+ }
+ }
+ }
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7083,20 +7414,53 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
- // If the user is a MFCR instruction, we know this is safe. Otherwise we
- // give up for right now.
- if (FlagUser->getOpcode() == PPCISD::MFCR)
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
- // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+ // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+ // value. If so, pass-through the AND to get to the intrinsic.
+ if (LHS.getOpcode() == ISD::AND &&
+ LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
+ isZero())
+ LHS = LHS.getOperand(0);
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(RHS)) {
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ "Counter decrement comparison is not EQ or NE");
+
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+ (CC == ISD::SETNE && !Val);
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+ assert(LHS.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
int CompareOpc;
bool isDot;
@@ -7271,7 +7635,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
@@ -7296,7 +7660,24 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
}
@@ -7406,25 +7787,13 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode for load / store of the
-/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
- // PPC allows a sign-extended 16-bit immediate field.
- return (V > -(1 << 16) && V < (1 << 16)-1);
-}
-
-bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
- return false;
-}
-
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// Make sure the function does not optimize away the store of the RA to
@@ -7454,7 +7823,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -7537,18 +7906,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
@@ -7558,9 +7924,15 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
}
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}
OpenPOWER on IntegriCloud