summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp299
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp287
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp162
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp570
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp102
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp160
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp101
18 files changed, 1471 insertions, 532 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9cc70a3..f427511 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -319,6 +319,10 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorkList(N);
}
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -1290,6 +1294,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
return SDValue();
}
+/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
+/// return 0 or 1 depending on the carry flag.
+static bool isCarryMaterialization(SDValue V) {
+ if (V.getOpcode() != ISD::ADDE)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
+ return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1453,6 +1467,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
+ // add (adde 0, 0, glue), X -> adde X, 0, glue
+ if (N0->hasOneUse() && isCarryMaterialization(N0))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+ DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
+ N0.getOperand(2));
+
+ // add X, (adde 0, 0, glue) -> adde X, 0, glue
+ if (N1->hasOneUse() && isCarryMaterialization(N1))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+ DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
+ N1.getOperand(2));
+
return SDValue();
}
@@ -1496,6 +1522,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
N->getDebugLoc(), MVT::Glue));
}
+ // addc (adde 0, 0, glue), X -> adde X, 0, glue
+ if (N0->hasOneUse() && isCarryMaterialization(N0))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
+ DAG.getConstant(0, VT), N0.getOperand(2));
+
+ // addc X, (adde 0, 0, glue) -> adde X, 0, glue
+ if (N1->hasOneUse() && isCarryMaterialization(N1))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
+ DAG.getConstant(0, VT), N1.getOperand(2));
+
return SDValue();
}
@@ -1506,6 +1542,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ // If both operands are null we know that carry out will always be false.
+ if (N0C && N0C->isNullValue() && N0 == N1)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(),
+ MVT::Glue));
+
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
@@ -3281,8 +3323,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
- N0.getOperand(0), N1);
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
}
@@ -3688,7 +3732,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (load x)) -> (sext (truncate (sextload x)))
// None of the supported targets knows how to perform load and sign extend
- // in one instruction. We only perform this transformation on scalars.
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
@@ -3839,7 +3884,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3892,7 +3937,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (load x)) -> (zext (truncate (zextload x)))
// None of the supported targets knows how to perform load and vector_zext
- // in one instruction. We only perform this transformation on scalar zext.
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
@@ -4066,7 +4112,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -4101,7 +4147,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
- // in one instruction. We only perform this transformation on scalars.
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
@@ -4514,7 +4561,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
- // Currenly we only perform this optimization on scalars because vectors
+ // Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
SDValue Shorter =
@@ -5101,7 +5148,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128)
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -5123,7 +5172,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128)
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -5817,8 +5868,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// value.
// TODO: Handle store large -> read small portion.
// TODO: Handle TRUNCSTORE/LOADEXT
- if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
- !LD->isVolatile()) {
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
@@ -6217,6 +6267,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->isNonTemporal(), OrigAlign);
}
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
// NOTE: If the original store is volatile, this transform must not increase
@@ -6250,8 +6304,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
- } else if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 490b857..3af9482 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -121,10 +122,9 @@ unsigned FastISel::getRegForValue(const Value *V) {
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end()) {
- unsigned Reg = I->second;
- return Reg;
- }
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
@@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
Reg =
getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- // Try to emit the constant directly.
- Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
if (!Reg) {
// Try to emit the constant by using an integer constant with a cast.
@@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
return false;
}
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// Check if the second operand is a constant and handle it appropriately.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op0IsKill,
- CI->getZExtValue());
- if (ResultReg != 0) {
- // We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
- return true;
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
}
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
}
// Check if the second operand is a constant float.
@@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) {
}
bool FastISel::SelectCall(const User *I) {
- const Function *F = cast<CallInst>(I)->getCalledFunction();
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
- unsigned IID = F->getIntrinsicID();
- switch (IID) {
+ switch (F->getIntrinsicID()) {
default: break;
case Intrinsic::dbg_declare: {
- const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
!FuncInfo.MF->getMMI().hasDebugInfo())
return true;
@@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) {
}
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
- const DbgValueInst *DI = cast<DbgValueInst>(I);
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
if (!V) {
@@ -523,65 +575,58 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(I->getType());
- switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
- default: break;
- case TargetLowering::Expand: {
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(I, ResultReg);
- return true;
- }
- }
- break;
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+ break;
+
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+ UpdateValueMap(Call, ResultReg);
+ return true;
}
case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(I->getType());
- switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
- default: break;
- case TargetLowering::Expand: {
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+ break;
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+ else {
#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
+ FuncInfo.CatchInfoLost.insert(Call);
#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(I);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
- UpdateValueMap(I, ResultReg);
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ EVT SrcVT = TLI.getPointerTy();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+
+ bool ResultRegIsKill = hasTrivialKill(Call);
+
+ // Cast the register to the type of the selector.
+ if (SrcVT.bitsGT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+ ResultReg, ResultRegIsKill);
+ else if (SrcVT.bitsLT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+ if (ResultReg == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
- return true;
- }
- }
- break;
+ UpdateValueMap(Call, ResultReg);
+
+ return true;
}
}
@@ -966,59 +1011,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
unsigned Op0, bool Op0IsKill,
uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
// First check if immediate type is legal. If not, we can't use the ri form.
unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg != 0)
return ResultReg;
unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
- if (MaterialReg == 0)
- return 0;
- return FastEmit_rr(VT, VT, Opcode,
- Op0, Op0IsKill,
- MaterialReg, /*Kill=*/true);
-}
-
-/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with a floating-point immediate operand using
-/// FastEmit_rf. If that fails, it materializes the immediate into a register
-/// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm, MVT ImmType) {
- // First check if immediate type is legal. If not, we can't use the rf form.
- unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
- if (ResultReg != 0)
- return ResultReg;
-
- // Materialize the constant in a register.
- unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
if (MaterialReg == 0) {
- // If the target doesn't have a way to directly enter a floating-point
- // value into a register, use an alternate approach.
- // TODO: The current approach only supports floating-point constants
- // that can be constructed by conversion from integer values. This should
- // be replaced by code that creates a load from a constant-pool entry,
- // which will require some target-specific work.
- const APFloat &Flt = FPImm->getValueAPF();
- EVT IntVT = TLI.getPointerTy();
-
- uint64_t x[2];
- uint32_t IntBitWidth = IntVT.getSizeInBits();
- bool isExact;
- (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
- if (!isExact)
- return 0;
- APInt IntVal(IntBitWidth, 2, x);
-
- unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::Constant, IntVal.getZExtValue());
- if (IntegerReg == 0)
- return 0;
- MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
- ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
- if (MaterialReg == 0)
- return 0;
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
}
return FastEmit_rr(VT, VT, Opcode,
Op0, Op0IsKill,
@@ -1099,6 +1118,29 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -1160,6 +1202,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
@@ -1215,7 +1274,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Only handle legal types. Two interesting things to note here. First,
// by bailing out early, we may leave behind some dead instructions,
// since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
- // own moves. Second, this check is necessary becuase FastISel doesn't
+ // own moves. Second, this check is necessary because FastISel doesn't
// use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 2ae3286..d8a5770 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -448,16 +448,30 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
-void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
- for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end();
- I != E; ++I)
- if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
- // Apply the catch info to DestBB.
- AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+ SmallPtrSet<const BasicBlock*, 4> Visited;
+
+ // The 'eh.selector' call may not be in the direct successor of a basic block,
+ // but could be several successors deeper. If we don't find it, try going one
+ // level further. <rdar://problem/8824861>
+ while (Visited.insert(SuccBB)) {
+ for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+ I != E; ++I)
+ if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to LPad.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
#ifndef NDEBUG
- if (!FLI.MBBMap[SrcBB]->isLandingPad())
- FLI.CatchInfoFound.insert(EHSel);
+ if (!FLI.MBBMap[SuccBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
#endif
- }
+ return;
+ }
+
+ const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+ if (Br && Br->isUnconditional())
+ SuccBB = Br->getSuccessor(0);
+ else
+ break;
+ }
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f08528f..2b6c56e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -61,10 +61,10 @@ class SelectionDAGLegalize {
// Libcall insertion helpers.
- /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
/// legalized. We use this to ensure that calls are properly serialized
/// against each other, including inserted libcalls.
- SDValue LastCALLSEQ_END;
+ SmallVector<SDValue, 8> LastCALLSEQ;
enum LegalizeAction {
Legal, // The target natively supports this operation.
@@ -142,6 +142,9 @@ private:
DebugLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -153,6 +156,7 @@ private:
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -178,6 +182,15 @@ private:
void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); }
+ void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+ void pushLastCALLSEQ(SDValue s) {
+ LastCALLSEQ.push_back(s);
+ }
+ void popLastCALLSEQ() {
+ LastCALLSEQ.pop_back();
+ }
};
}
@@ -223,7 +236,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
}
void SelectionDAGLegalize::LegalizeDAG() {
- LastCALLSEQ_END = DAG.getEntryNode();
+ pushLastCALLSEQ(DAG.getEntryNode());
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -251,14 +264,15 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- // Nested CALLSEQ_START/END constructs aren't yet legal,
- // but we can DTRT and handle them correctly here.
+ int next_depth = depth;
if (Node->getOpcode() == ISD::CALLSEQ_START)
- depth++;
- else if (Node->getOpcode() == ISD::CALLSEQ_END) {
- depth--;
- if (depth == 0)
+ next_depth = depth + 1;
+ if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ assert(depth > 0 && "negative depth!");
+ if (depth == 1)
return Node;
+ else
+ next_depth = depth - 1;
}
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -289,7 +303,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+ if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
return Result;
}
return 0;
@@ -786,7 +800,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
}
}
- return SDValue();
+ return SDValue(0, 0);
}
/// LegalizeOp - We know that the specified value has a legal type, and
@@ -934,11 +948,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::BR_JT:
case ISD::BR_CC:
case ISD::BRCOND:
- // Branches tweak the chain to include LastCALLSEQ_END
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ // Branches tweak the chain to include LastCALLSEQ
Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
+ getLastCALLSEQ());
Ops[0] = LegalizeOp(Ops[0]);
- LastCALLSEQ_END = DAG.getEntryNode();
+ setLastCALLSEQ(DAG.getEntryNode());
break;
case ISD::SHL:
case ISD::SRL:
@@ -948,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
if (!Ops[1].getValueType().isVector())
- Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[1]));
break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
@@ -956,7 +972,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
if (!Ops[2].getValueType().isVector())
- Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+ Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[2]));
break;
}
@@ -1024,8 +1041,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case ISD::CALLSEQ_START: {
- static int depth = 0;
SDNode *CallEnd = FindCallEndFromCallStart(Node);
+ assert(CallEnd && "didn't find CALLSEQ_END!");
// Recursively Legalize all of the inputs of the call end that do not lead
// to this call start. This ensures that any libcalls that need be inserted
@@ -1042,9 +1059,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
+ if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, LastCALLSEQ_END);
+ Tmp1, getLastCALLSEQ());
Tmp1 = LegalizeOp(Tmp1);
}
@@ -1065,29 +1082,28 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
-
- SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
// Note that we are selecting this call!
- LastCALLSEQ_END = SDValue(CallEnd, 0);
+ setLastCALLSEQ(SDValue(CallEnd, 0));
- depth++;
// Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(LastCALLSEQ_END);
- depth--;
- assert(depth >= 0 && "Un-matched CALLSEQ_START?");
- if (depth > 0)
- LastCALLSEQ_END = Saved_LastCALLSEQ_END;
+ LegalizeOp(getLastCALLSEQ());
return Result;
}
case ISD::CALLSEQ_END:
- // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
- // will cause this node to be legalized as well as handling libcalls right.
- if (LastCALLSEQ_END.getNode() != Node) {
- LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
+ {
+ SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (getLastCALLSEQ().getNode() != Node) {
+ LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
}
// Otherwise, the call start has been legalized and everything is going
@@ -1116,6 +1132,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
}
// This finishes up call legalization.
+ popLastCALLSEQ();
+
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
if (Node->getNumValues() == 2)
@@ -2035,9 +2053,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return DAG.getRoot();
// Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
+
return CallInfo.first;
}
@@ -2072,7 +2124,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Callee, Args, DAG, Node->getDebugLoc());
// Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
return CallInfo;
@@ -2112,6 +2164,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ unsigned OtherOpcode = 0;
+ if (isSigned)
+ OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+ else
+ OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ if (User->getOpcode() == OtherOpcode &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ DebugLoc dl = Node->getDebugLoc();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+ MachinePointerInfo(), false, false, 0);
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
@@ -2759,7 +2918,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
}
case ISD::FP_ROUND_INREG: {
// The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targetting a temporary location (a stack slot).
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
// NOTE: there is a choice here between constantly creating new stack
// slots and always reusing the same one. We currently always create
@@ -3085,24 +3244,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned) {
+ } else if (isSigned)
Tmp1 = ExpandIntLibCall(Node, true,
RTLIB::SREM_I8,
RTLIB::SREM_I16, RTLIB::SREM_I32,
RTLIB::SREM_I64, RTLIB::SREM_I128);
- } else {
+ else
Tmp1 = ExpandIntLibCall(Node, false,
RTLIB::UREM_I8,
RTLIB::UREM_I16, RTLIB::UREM_I32,
RTLIB::UREM_I64, RTLIB::UREM_I128);
- }
Results.push_back(Tmp1);
break;
}
@@ -3112,7 +3272,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, true)))
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
@@ -3141,6 +3303,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(Tmp1.getValue(1));
break;
}
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3225,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::UMULO:
case ISD::SMULO: {
EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue BottomHalf;
@@ -3242,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TopHalf = BottomHalf.getValue(1);
} else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() * 2))) {
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3255,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// have a libcall big enough.
// Also, we can fall back to a division in some cases, but that's a big
// performance hit in the general case.
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (WideVT == MVT::i16)
LC = RTLIB::MUL_I16;
@@ -3266,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
else if (WideVT == MVT::i128)
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
- LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
- RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
-
- SDValue Ret = ExpandLibCall(LC, Node, isSigned);
- BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
- TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
- DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
- TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
}
+
if (isSigned) {
Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
TLI.getShiftAmountTy(BottomHalf.getValueType()));
@@ -3409,7 +3587,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- LastCALLSEQ_END = DAG.getEntryNode();
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ setLastCALLSEQ(DAG.getEntryNode());
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f0752df..935aab0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1051,8 +1051,6 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
- case ISD::UMULO:
- case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1428,9 +1426,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
}
- return;
+ return;
}
-
+
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
@@ -2128,31 +2126,6 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ofl);
}
-void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- DebugLoc dl = N->getDebugLoc();
- EVT VT = N->getValueType(0);
- EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
- // Expand the result by simply replacing it with the equivalent
- // non-overflow-checking operation.
- SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
- SplitInteger(Ret, Lo, Hi);
-
- // Now calculate overflow.
- SDValue Ofl;
- if (N->getOpcode() == ISD::UMULO)
- Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
- DAG.getConstant(0, VT), ISD::SETNE);
- else {
- SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
- Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
- Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
- }
- ReplaceValueWith(SDValue(N, 1), Ofl);
-}
-
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3f81bbb..5409b88 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -348,7 +348,6 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
@@ -523,6 +522,7 @@ private:
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -566,7 +566,6 @@ private:
void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 167dbe0..5d0f923 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -58,6 +58,9 @@ class VectorLegalizer {
SDValue UnrollVSETCC(SDValue Op);
// Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
// isn't legal.
+ // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ // SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::FNEG)
+ if (Node->getOpcode() == ISD::UINT_TO_FP)
+ Result = ExpandUINT_TO_FLOAT(Op);
+ else if (Node->getOpcode() == ISD::FNEG)
Result = ExpandFNEG(Op);
else if (Node->getOpcode() == ISD::VSETCC)
Result = UnrollVSETCC(Op);
@@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+
+
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 182f8fc..0b4dd35 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -63,27 +64,33 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
+ case ISD::ANY_EXTEND:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::FABS:
+ case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FRINT:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
- case ISD::FFLOOR:
- case ISD::FCEIL:
- case ISD::FRINT:
- case ISD::FNEARBYINT:
- case ISD::UINT_TO_FP:
+ case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
R = ScalarizeVecRes_UnaryOp(N);
break;
@@ -145,6 +152,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
N->getOperand(0), N->getOperand(1));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ NewVT, Op, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
@@ -405,11 +419,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
-
case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
- case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
@@ -427,32 +439,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
- case ISD::CTTZ:
+ case ISD::ANY_EXTEND:
+ case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTPOP:
- case ISD::FNEG:
+ case ISD::CTTZ:
case ISD::FABS:
- case ISD::FSQRT:
- case ISD::FSIN:
+ case ISD::FCEIL:
case ISD::FCOS:
- case ISD::FTRUNC:
+ case ISD::FEXP:
+ case ISD::FEXP2:
case ISD::FFLOOR:
- case ISD::FCEIL:
- case ISD::FRINT:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- case ISD::FEXP:
- case ISD::FEXP2:
- case ISD::FLOG:
- case ISD::FLOG2:
- case ISD::FLOG10:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -587,60 +602,6 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
}
-void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT LoVT, HiVT;
- DebugLoc dl = N->getDebugLoc();
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-
- SDValue DTyOpLo = DAG.getValueType(LoVT);
- SDValue DTyOpHi = DAG.getValueType(HiVT);
-
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
- // Split the input.
- SDValue VLo, VHi;
- EVT InVT = N->getOperand(0).getValueType();
- switch (getTypeAction(InVT)) {
- default: llvm_unreachable("Unexpected type action!");
- case Legal: {
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getIntPtrConstant(0));
- VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case SplitVector:
- GetSplitVector(N->getOperand(0), VLo, VHi);
- break;
- case WidenVector: {
- // If the result needs to be split and the input needs to be widened,
- // the two types must have different lengths. Use the widened result
- // and extract from it to do the split.
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- }
-
- SDValue STyOpLo = DAG.getValueType(VLo.getValueType());
- SDValue STyOpHi = DAG.getValueType(VHi.getValueType());
-
- Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
- CvtCode);
- Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
- CvtCode);
-}
-
void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -840,8 +801,25 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
}
}
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
}
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
@@ -989,11 +967,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- case ISD::FP_EXTEND:
case ISD::FTRUNC:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
@@ -1270,15 +1248,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Shift(N);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
Res = WidenVecRes_Convert(N);
break;
@@ -1286,15 +1265,20 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::FABS:
+ case ISD::FCEIL:
case ISD::FCOS:
- case ISD::FNEG:
- case ISD::FSIN:
- case ISD::FSQRT:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FFLOOR:
case ISD::FLOG:
- case ISD::FLOG2:
case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
Res = WidenVecRes_Unary(N);
break;
}
@@ -2004,7 +1988,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
- case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index e3da208..7b560d1 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -570,13 +570,20 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
SUnit *NewDef = 0;
- if (DestRC)
+ if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
if (!NewDef) {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 0b548b2..88bd450 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -70,6 +70,50 @@ static cl::opt<bool> DisableSchedCycles(
"disable-sched-cycles", cl::Hidden, cl::init(false),
cl::desc("Disable cycle-level precision during preRA scheduling"));
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+ // For sched=list-ilp, Count the number of times each factor comes into play.
+ enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+ FactStatic, FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -103,6 +147,10 @@ private:
/// MinAvailableCycle - Cycle of the soonest available instruction.
unsigned MinAvailableCycle;
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
@@ -234,8 +282,14 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
<< " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ FactorCount[i] = 0;
+ }
+#endif //!NDEBUG
CurCycle = 0;
+ IssueCount = 0;
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
@@ -258,6 +312,11 @@ void ScheduleDAGRRList::Schedule() {
else
ListScheduleTopDown();
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+ }
+#endif // !NDEBUG
AvailableQueue->releaseState();
}
@@ -295,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
if (Height < MinAvailableCycle)
MinAvailableCycle = Height;
- if (isReady(SU)) {
+ if (isReady(PredSU)) {
AvailableQueue->push(PredSU);
}
// CapturePred and others may have left the node in the pending queue, avoid
@@ -383,6 +442,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
if (NextCycle <= CurCycle)
return;
+ IssueCount = 0;
AvailableQueue->setCurCycle(NextCycle);
if (!HazardRec->isEnabled()) {
// Bypass lots of virtual calls in case of long latency.
@@ -407,6 +467,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
if (DisableSchedCycles)
return;
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
// Bump CurCycle to account for latency. We assume the latency of other
@@ -477,6 +544,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
}
}
+static void resetVRegCycle(SUnit *SU);
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
@@ -486,12 +555,13 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
// backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
- // node it's ready cycle can aid heuristics, and after scheduling it can
+ // node its ready cycle can aid heuristics, and after scheduling it can
// indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
@@ -502,6 +572,12 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
AvailableQueue->ScheduledNode(SU);
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
// Update liveness of predecessors before successors to avoid treating a
// two-address node as a live range def.
ReleasePredecessors(SU);
@@ -518,16 +594,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
}
+ resetVRegCycle(SU);
+
SU->isScheduled = true;
// Conditions under which the scheduler should eagerly advance the cycle:
// (1) No available instructions
// (2) All pipelines full, so available instructions must have hazards.
//
- // If HazardRec is disabled, count each inst as one cycle.
- if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
- || AvailableQueue->empty())
- AdvanceToCycle(CurCycle + 1);
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -872,6 +957,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
AddPred(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, *I));
}
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true);
+ AddPred(SuccSU, D);
+ }
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
@@ -1077,13 +1171,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
SUnit *NewDef = 0;
- if (DestRC)
+ if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
if (!NewDef) {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
@@ -1139,7 +1239,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- DEBUG(dbgs() << "\n*** Examining Available\n";
+ DEBUG(dbgs() << "\nExamining Available:\n";
AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
@@ -1318,7 +1418,7 @@ struct src_ls_rr_sort : public queue_sort {
struct hybrid_ls_rr_sort : public queue_sort {
enum {
IsBottomUp = true,
- HasReadyFilter = true
+ HasReadyFilter = false
};
RegReductionPQBase *SPQ;
@@ -1337,7 +1437,7 @@ struct hybrid_ls_rr_sort : public queue_sort {
struct ilp_ls_rr_sort : public queue_sort {
enum {
IsBottomUp = true,
- HasReadyFilter = true
+ HasReadyFilter = false
};
RegReductionPQBase *SPQ;
@@ -1395,7 +1495,7 @@ public:
std::fill(RegPressure.begin(), RegPressure.end(), 0);
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
}
}
@@ -1422,6 +1522,8 @@ public:
unsigned getNodePriority(const SUnit *SU) const;
unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
return scheduleDAG->DAG->GetOrdering(SU->getNode());
}
@@ -1450,7 +1552,9 @@ public:
bool HighRegPressure(const SUnit *SU) const;
- bool MayReduceRegPressure(SUnit *SU);
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
void ScheduledNode(SUnit *SU);
@@ -1538,6 +1642,20 @@ ILPBURRPriorityQueue;
// Static Node Priority for Register Pressure Reduction
//===----------------------------------------------------------------------===//
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
@@ -1576,17 +1694,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
-}
-
void RegReductionPQBase::addNode(const SUnit *SU) {
unsigned SUSize = SethiUllmanNumbers.size();
if (SUnits->size() > SUSize)
@@ -1625,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// If SU does not have a register def, schedule it close to its uses
// because it does not lengthen any live ranges.
return 0;
+#if 1
return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
}
//===----------------------------------------------------------------------===//
@@ -1670,7 +1787,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
return false;
}
-bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
const SDNode *N = SU->getNode();
if (!N->isMachineOpcode() || !SU->NumSuccs)
@@ -1688,10 +1805,60 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
return false;
}
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
void RegReductionPQBase::ScheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
+ if (!SU->getNode())
+ return;
+
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl())
@@ -1758,6 +1925,8 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
return;
const SDNode *N = SU->getNode();
+ if (!N) return;
+
if (!N->isMachineOpcode()) {
if (N->getOpcode() != ISD::CopyToReg)
return;
@@ -1871,7 +2040,29 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
/// CopyToReg to a virtual register. This SU def is probably a liveout and
/// it has no other use. It should be scheduled closer to the terminator.
static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -1893,20 +2084,67 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
return RetVal;
}
-/// UnitsSharePred - Return true if the two scheduling units share a common
-/// data predecessor.
-static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
- SmallSet<const SUnit*, 4> Preds;
- for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- Preds.insert(I->getSUnit());
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
}
- for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- if (Preds.count(I->getSUnit()))
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
return true;
+ }
}
return false;
}
@@ -1926,23 +2164,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
RegReductionPQBase *SPQ) {
- // If the two nodes share an operand and one of them has a single
- // use that is a live out copy, favor the one that is live out. Otherwise
- // it will be difficult to eliminate the copy if the instruction is a
- // loop induction variable update. e.g.
- // BB:
- // sub r1, r3, #1
- // str r0, [r2, r3]
- // mov r3, r1
- // cmp
- // bne BB
- bool SharePred = UnitsSharePred(left, right);
- // FIXME: Only adjust if BB is a loop back edge.
- // FIXME: What's the cost of a copy?
- int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
- int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
- int LHeight = (int)left->getHeight() - LBonus;
- int RHeight = (int)right->getHeight() - RBonus;
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
BUHasStall(left, LHeight, SPQ);
@@ -1953,45 +2180,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall)
+ if (!RStall) {
+ DEBUG(++FactorCount[FactStall]);
return 1;
- if (LHeight != RHeight)
+ }
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactStall]);
return LHeight > RHeight ? 1 : -1;
- } else if (RStall)
+ }
+ } else if (RStall) {
+ DEBUG(++FactorCount[FactStall]);
return -1;
+ }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
right->SchedulingPref == Sched::Latency)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight)
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactHeight]);
return LHeight > RHeight ? 1 : -1;
+ }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
- // it's height is already covered so only its depth matters. We also reach
+ // its height is already covered so only its depth matters. We also reach
// this if both stall but have the same height.
- unsigned LDepth = left->getDepth();
- unsigned RDepth = right->getDepth();
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
+ DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency)
+ if (left->Latency != right->Latency) {
+ DEBUG(++FactorCount[FactOther]);
return left->Latency > right->Latency ? 1 : -1;
+ }
}
return 0;
}
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ DEBUG(++FactorCount[FactRegUses]);
+ #ifndef NDEBUG
+ const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
- if (LPriority != RPriority)
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority) {
+ DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
+ }
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
@@ -2012,40 +2296,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist)
+ if (LDist != RDist) {
+ DEBUG(++FactorCount[FactOther]);
return LDist < RDist;
+ }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch)
+ if (LScratch != RScratch) {
+ DEBUG(++FactorCount[FactOther]);
return LScratch > RScratch;
+ }
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
- if (!DisableSchedCycles) {
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight())
+ if (left->getHeight() != right->getHeight()) {
+ DEBUG(++FactorCount[FactHeight]);
return left->getHeight() > right->getHeight();
+ }
- if (left->getDepth() != right->getDepth())
+ if (left->getDepth() != right->getDepth()) {
+ DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
+ }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
+ DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -2077,6 +2383,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
// Return true if right should be scheduled with higher priority than left.
bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
if (left->isCall || right->isCall)
// No way to compute latency of calls.
return BURRSort(left, right, SPQ);
@@ -2086,16 +2395,18 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
}
- else if (!LHigh && !RHigh) {
+ if (!LHigh && !RHigh) {
int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
@@ -2112,34 +2423,118 @@ bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
!= ScheduleHazardRecognizer::NoHazard)
return false;
- return SU->getHeight() <= CurCycle;
+ return true;
}
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
if (left->isCall || right->isCall)
// No way to compute latency of calls.
return BURRSort(left, right, SPQ);
- bool LHigh = SPQ->HighRegPressure(left);
- bool RHigh = SPQ->HighRegPressure(right);
- // Avoid causing spills. If register pressure is high, schedule for
- // register pressure reduction.
- if (LHigh && !RHigh)
- return true;
- else if (!LHigh && RHigh)
- return false;
- else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule to maximize instruction level
- // parallelism.
- if (left->NumPreds > right->NumPreds)
- return false;
- else if (left->NumPreds < right->NumPreds)
- return false;
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ DEBUG(++FactorCount[FactRegUses]);
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ DEBUG(++FactorCount[FactDepth]);
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
}
return BURRSort(left, right, SPQ);
}
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Preschedule for Register Pressure
//===----------------------------------------------------------------------===//
@@ -2417,6 +2812,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
// Top down
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res < 0;
+
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 477c1ff..9f2f012 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -27,12 +27,21 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
: ScheduleDAG(mf),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
@@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
@@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
+ SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
@@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
if (!HasGlueUse) break;
}
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
// If there are glue operands involved, N is now the bottom-most node
// of the sequence of nodes that are glued together.
// Update the SUnit.
@@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
}
void ScheduleDAGSDNodes::AddSchedEdges() {
@@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// If this is a ctrl dep, latency is 1.
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
@@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
// a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
--OpSU->NumRegDefsLeft;
}
}
@@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Initialize NumNodeDefs for the current Node's opcode.
void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
if (!Node->isMachineOpcode()) {
if (Node->getOpcode() == ISD::CopyFromReg)
NodeNumDefs = 1;
@@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
SU->Latency = 1;
@@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
if (!InstrItins || InstrItins->isEmpty()) {
- SU->Latency = 1;
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
return;
}
@@ -573,7 +636,7 @@ namespace {
};
}
-/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
InstrEmitter &Emitter,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index cc7310e..b5f68f3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -80,6 +80,12 @@ namespace llvm {
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+ /// CopyToReg and its only active data operands are CopyFromReg within a
+ /// single block loop.
+ ///
+ void InitVRegCycleFlag(SUnit *SU);
+
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void InitNumRegDefsLeft(SUnit *SU);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9120288..c2711c8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1418,9 +1418,9 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
-SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- MVT ShTy = TLI.getShiftAmountTy(OpTy);
+ MVT ShTy = TLI.getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -2482,6 +2482,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
"Vector element count mismatch!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, VT);
break;
case ISD::ZERO_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2496,6 +2499,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, VT);
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2512,6 +2518,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
@@ -5904,7 +5912,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BITCAST: return "bit_convert";
+ case ISD::BITCAST: return "bitcast";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -6226,6 +6234,9 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
return;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
OS << '\n';
printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
}
@@ -6238,7 +6249,7 @@ void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
- printrWithDepth(OS, G, 100);
+ printrWithDepth(OS, G, 10);
}
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
@@ -6247,7 +6258,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
- dumprWithDepth(G, 100);
+ dumprWithDepth(G, 10);
}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
@@ -6311,7 +6322,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
case ISD::ROTL:
case ISD::ROTR:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
- getShiftAmountOperand(Operands[1])));
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG: {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 48d9bbb..b02a7b6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -50,7 +50,6 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision",
// %buffer = alloca [4096 x i8]
// %data = load [4096 x i8]* %argPtr
// store [4096 x i8] %data, [4096 x i8]* %buffer
-static cl::opt<unsigned>
-MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
- cl::init(64), cl::Hidden);
+static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
@@ -1130,15 +1127,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
else if (F->paramHasAttr(0, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
- // FIXME: C calling convention requires the return type to be promoted
- // to at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
@@ -1153,9 +1143,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Flags.setInReg();
// Propagate extension type if any
- if (F->paramHasAttr(0, Attribute::SExt))
+ if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
- else if (F->paramHasAttr(0, Attribute::ZExt))
+ else if (ExtendKind == ISD::ZERO_EXTEND)
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
@@ -2029,9 +2019,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
- double LDensity = (double)LSize.roundToDouble() /
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
(LEnd - First + 1ULL).roundToDouble();
- double RDensity = (double)RSize.roundToDouble() /
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
(Last - RBegin + 1ULL).roundToDouble();
double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
@@ -4039,10 +4033,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (DV.isInlinedFnArgument(MF.getFunction()))
return false;
- MachineBasicBlock *MBB = FuncInfo.MBB;
- if (MBB != &MF.front())
- return false;
-
unsigned Reg = 0;
if (Arg->hasByValAttr()) {
// Byval arguments' frame index is recorded during argument lowering.
@@ -4413,7 +4403,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_dispatch_setup: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot(), getValue(I.getArgOperand(0))));
+ getRoot()));
return 0;
}
@@ -4682,9 +4672,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
- case Intrinsic::trap:
- DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, getCurDebugLoc());
+ DAG.setRoot(Result.second);
return 0;
+ }
case Intrinsic::uadd_with_overflow:
return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
@@ -4937,15 +4940,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
DAG.getVTList(&RetTys[0], RetTys.size()),
&ReturnValues[0], ReturnValues.size()));
-
}
- // As a special case, a null chain means that a tail call has been emitted and
- // the DAG root is already updated.
- if (Result.second.getNode())
- DAG.setRoot(Result.second);
- else
+ // Assign order to nodes here. If the call does not produce a result, it won't
+ // be mapped to a SDNode and visit() will not assign it an order number.
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
HasTailCall = true;
+ ++SDNodeOrder;
+ AssignOrderingToNode(DAG.getRoot().getNode());
+ } else {
+ DAG.setRoot(Result.second);
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.second.getNode());
+ }
if (LandingPad) {
// Insert a label at the end of the invoke call to mark the try range. This
@@ -5211,12 +5220,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LowerCallTo(&I, Callee, I.isTailCall());
}
-namespace llvm {
+namespace {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
-class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
- public TargetLowering::AsmOperandInfo {
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
/// this is null, otherwise it is the incoming operand to the CallInst.
@@ -5304,7 +5312,7 @@ private:
typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
-} // end llvm namespace.
+} // end anonymous namespace
/// isAllocatableRegister - If the specified register is safe to allocate,
/// i.e. it isn't a stack pointer or some other special register, return the
@@ -5363,11 +5371,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
/// OpInfo describes the operand.
/// Input and OutputRegs are the set of already allocated physical registers.
///
-void SelectionDAGBuilder::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
- LLVMContext &Context = FuncInfo.Fn->getContext();
+static void GetRegistersForValue(SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ DebugLoc DL,
+ SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ LLVMContext &Context = *DAG.getContext();
// Compute whether this value requires an input register, an output register,
// or both.
@@ -5413,7 +5423,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// vector types).
EVT RegVT = *PhysReg.second->vt_begin();
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5423,7 +5433,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// machine.
RegVT = EVT::getIntegerVT(Context,
OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
@@ -5694,7 +5704,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5705,7 +5716,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6181,7 +6193,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
// For a function returning void, there is no return value. We can't create
// such a node, so we just return a null return value in that case. In
- // that case, nothing will actualy look at the value.
+ // that case, nothing will actually look at the value.
if (ReturnValues.empty())
return std::make_pair(SDValue(), Chain);
@@ -6397,7 +6409,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SDB->setValue(I, Res);
// If this argument is live outside of the entry block, insert a copy from
- // whereever we got it to the vreg that other BB's will reference it as.
+ // wherever we got it to the vreg that other BB's will reference it as.
SDB->CopyToExportRegsIfNeeded(I);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8f466d9..a689b76 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -23,7 +23,6 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include <vector>
-#include <set>
namespace llvm {
@@ -60,7 +59,6 @@ class MDNode;
class PHINode;
class PtrToIntInst;
class ReturnInst;
-class SDISelAsmOperandInfo;
class SDDbgValue;
class SExtInst;
class SelectInst;
@@ -380,10 +378,6 @@ public:
assert(N.getNode() == 0 && "Already set a value for this node!");
N = NewN;
}
-
- void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs);
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68ba966..fdf3767 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -421,10 +421,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
-void
-SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End,
- bool &HadTailCall) {
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
// Lower all of the non-terminator instructions. If a call is emitted
// as a tail call, cease emitting nodes for this block. Terminators
// are handled below.
@@ -438,7 +437,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Final step, emit the lowered DAG as machine code.
CodeGenAndEmitDAG();
- return;
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -489,13 +487,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
if (TimePassesIsEnabled)
GroupName = "Instruction Selection and Scheduling";
std::string BlockName;
+ int BlockNumber = -1;
+#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
BlockName = MF->getFunction()->getNameStr() + ":" +
FuncInfo->MBB->getBasicBlock()->getNameStr();
-
- DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -505,7 +509,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(Unrestricted, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -518,7 +523,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (Changed) {
if (ViewDAGCombineLT)
@@ -531,8 +537,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
- CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
}
{
@@ -556,8 +562,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
- CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
}
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
@@ -567,7 +573,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize(OptLevel);
}
- DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -577,7 +584,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -591,7 +599,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -632,7 +641,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(errs() << "===== Instruction selection begins:\n");
+ DEBUG(errs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
@@ -735,16 +746,49 @@ void SelectionDAGISel::PrepareEHLandingPad() {
-
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
// Don't try to fold volatile loads. Target has to deal with alignment
// constraints.
if (LI->isVolatile()) return false;
- // Figure out which vreg this is going into.
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
unsigned LoadReg = FastIS->getRegForValue(LI);
- assert(LoadReg && "Load isn't already assigned a vreg? ");
+ if (LoadReg == 0)
+ return false;
// Check to see what the uses of this vreg are. If it has no uses, or more
// than one use (at the machine instr level) then we can't fold it.
@@ -764,7 +808,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
"The only use of the vreg must be a use, we haven't emitted the def!");
MachineInstr *User = &*RI;
-
+
// Set the insertion point properly. Folding the load can cause generation of
// other random instructions (like sign extends) for addressing modes, make
// sure they get inserted in a logical place before the new instruction.
@@ -817,6 +861,17 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) {
}
#endif
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
@@ -843,15 +898,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (AllPredsVisited) {
- for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
- I != E && isa<PHINode>(I); ++I) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
- }
} else {
- for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
- I != E && isa<PHINode>(I); ++I) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
- }
}
FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -899,10 +952,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = llvm::prior(BI);
// If we no longer require this instruction, skip it.
- if (!Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst) &&
- !isa<DbgInfoIntrinsic>(Inst) &&
- !FuncInfo->isExportedInst(Inst))
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo))
continue;
// Bottom-up: reset the insert pos at the top, after any local-value
@@ -911,16 +961,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
- // If fast isel succeeded, check to see if there is a single-use
- // non-volatile load right before the selected instruction, and see if
- // the load is used by the instruction. If so, try to fold it.
- const Instruction *BeforeInst = 0;
- if (Inst != Begin)
- BeforeInst = llvm::prior(llvm::prior(BI));
- if (BeforeInst && isa<LoadInst>(BeforeInst) &&
- BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
- --BI; // If we succeeded, don't re-select the load.
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
continue;
}
@@ -974,11 +1028,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
else
++NumFastIselBlocks;
- // Run SelectionDAG instruction selection on the remainder of the block
- // not handled by FastISel. If FastISel is not run, this is the entire
- // block.
- bool HadTailCall;
- SelectBasicBlock(Begin, BI, HadTailCall);
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
@@ -2392,6 +2448,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CurDAG->getRegister(RegNo, VT), (SDNode*)0));
continue;
}
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 76eb945..cd1647b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -90,7 +90,8 @@ namespace llvm {
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
template<typename EdgeIter>
- static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
SDValue Op = EI.getNode()->getOperand(EI.getOperand());
EVT VT = Op.getValueType();
if (VT == MVT::Glue)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 35b847c..15606af 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -93,6 +93,19 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::UREM_I32] = "__umodsi3";
Names[RTLIB::UREM_I64] = "__umoddi3";
Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
Names[RTLIB::NEG_I32] = "__negsi2";
Names[RTLIB::NEG_I64] = "__negdi2";
Names[RTLIB::ADD_F32] = "__addsf3";
@@ -1665,6 +1678,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
if (!ShAmt)
break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
@@ -1678,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
NewTrunc,
- In.getOperand(1)));
+ Shift));
}
break;
}
@@ -1829,7 +1849,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
DAGCombinerInfo &DCI, DebugLoc dl) const {
SelectionDAG &DAG = DCI.DAG;
- LLVMContext &Context = *DAG.getContext();
// These setcc operations always fold.
switch (Cond) {
@@ -1840,12 +1859,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETTRUE2: return DAG.getConstant(1, VT);
}
- if (isa<ConstantSDNode>(N0.getNode())) {
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
- }
-
+
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -1898,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
}
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not loosing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1936,7 +1990,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
if (bestWidth) {
- EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound()) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
@@ -3174,26 +3228,39 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
- ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
- APInt::mu magics = N1C->getAPIntValue().magicu();
+ const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::mu magics = N1C.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !N1C[0]) {
+ unsigned Shift = N1C.countTrailingZeros();
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = N1C.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
- SDValue Q;
if (isOperationLegalOrCustom(ISD::MULHU, VT))
- Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
- DAG.getConstant(magics.m, VT));
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
- N->getOperand(0),
- DAG.getConstant(magics.m, VT)).getNode(), 1);
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
else
return SDValue(); // No mulhu or equvialent
if (Created)
Created->push_back(Q.getNode());
if (magics.a == 0) {
- assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+ assert(magics.s < N1C.getBitWidth() &&
"We shouldn't generate an undefined shift!");
return DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
OpenPOWER on IntegriCloud