summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
commit1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/CodeGen/SelectionDAG
parent377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
downloadFreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip
FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz
Update LLVM to r104832.
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp144
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp262
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp98
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp147
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp71
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h22
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp53
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp89
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp4
16 files changed, 785 insertions, 221 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3639f80..6bddd78 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -760,12 +760,18 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
- if (NN1.getNode() == 0)
- return SDValue();
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (NN1.getNode() == 0)
+ return SDValue();
+ }
AddToWorkList(NN0.getNode());
- AddToWorkList(NN1.getNode());
+ if (NN1.getNode())
+ AddToWorkList(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -3425,8 +3431,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -3564,7 +3574,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -3585,9 +3595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
-
-
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3615,8 +3623,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
}
}
@@ -3726,8 +3738,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
}
- // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
if (N0.getOpcode() == ISD::SETCC) {
+ if (!LegalOperations && VT.isVector()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // Only do this before legalize for now.
+ EVT N0VT = N0.getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+ DAG.getConstant(1, EltVT));
+ if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ } else {
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ }
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -3780,8 +3832,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
}
}
@@ -3883,8 +3939,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
- // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
if (N0.getOpcode() == ISD::SETCC) {
+ // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -5278,10 +5365,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
- if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
- std::swap(BasePtr, Offset);
- if (Ptr != BasePtr)
- continue;
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
@@ -5953,6 +6036,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.getOpcode() == ISD::UNDEF)
+ return InVec;
+
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
// vector with the inserted element.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6206,7 +6293,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// all scalar elements the same.
if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
SDNode *V = N0.getNode();
-
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
@@ -6338,13 +6424,21 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
break;
}
- Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
- EltType, LHSOp, RHSOp));
- AddToWorkList(Ops.back().getNode());
- assert((Ops.back().getOpcode() == ISD::UNDEF ||
- Ops.back().getOpcode() == ISD::Constant ||
- Ops.back().getOpcode() == ISD::ConstantFP) &&
- "Scalar binop didn't fold!");
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (LHSOp.getValueType() != EltType)
+ LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
+ if (RHSOp.getValueType() != EltType)
+ RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
+
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ LHSOp, RHSOp);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::Constant &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
}
if (Ops.size() == LHS.getNumOperands()) {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4c3833..95f4d07 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -56,6 +56,27 @@
#include "FunctionLoweringInfo.h"
using namespace llvm;
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(I->use_begin())->getParent() == I->getParent();
+}
+
unsigned FastISel::getRegForValue(const Value *V) {
EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
@@ -78,12 +99,24 @@ unsigned FastISel::getRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- if (ValueMap.count(V))
- return ValueMap[V];
+ DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V);
+ if (I != ValueMap.end())
+ return I->second;
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (IsBottomUp) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ if (isa<Instruction>(V))
+ ValueMap[V] = Reg;
+ else
+ LocalValueMap[V] = Reg;
+ return Reg;
+ }
+
return materializeRegForValue(V, VT);
}
@@ -123,7 +156,8 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
unsigned IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), IntVal));
if (IntegerReg != 0)
- Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
}
}
} else if (const Operator *Op = dyn_cast<Operator>(V)) {
@@ -174,25 +208,33 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
else if (Reg != AssignedReg) {
const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
- Reg, RegClass, RegClass);
+ Reg, RegClass, RegClass, DL);
}
return AssignedReg;
}
-unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
unsigned IdxN = getRegForValue(Idx);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
- return 0;
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend it.
MVT PtrVT = TLI.getPointerTy();
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
- if (IdxVT.bitsLT(PtrVT))
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
- else if (IdxVT.bitsGT(PtrVT))
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
- return IdxN;
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
}
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -224,10 +266,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
// Check if the second operand is a constant and handle it appropriately.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, CI->getZExtValue());
+ ISDOpcode, Op0, Op0IsKill,
+ CI->getZExtValue());
if (ResultReg != 0) {
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, ResultReg);
@@ -238,7 +283,7 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Check if the second operand is a constant float.
if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, CF);
+ ISDOpcode, Op0, Op0IsKill, CF);
if (ResultReg != 0) {
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, ResultReg);
@@ -251,9 +296,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
// Now we have both operands in registers. Emit the instruction.
unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op1);
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
if (ResultReg == 0)
// Target-specific code wasn't able to find a machine opcode for
// the given ISD opcode and type. Halt "fast" selection and bail.
@@ -270,6 +319,8 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
const Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -282,10 +333,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
// FIXME: This can be optimized by combining the add with a
// subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ NIsKill = true;
}
Ty = StTy->getElementType(Field);
} else {
@@ -296,27 +348,31 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (CI->getZExtValue() == 0) continue;
uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ NIsKill = true;
continue;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
- unsigned IdxN = getRegForGEPIndex(Idx);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
if (ElementSize != 1) {
- IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ IdxNIsKill = true;
}
- N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -338,7 +394,7 @@ bool FastISel::SelectCall(const User *I) {
default: break;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
+ if (!DIVariable(DI->getVariable()).Verify() ||
!MF.getMMI().hasDebugInfo())
return true;
@@ -402,7 +458,7 @@ bool FastISel::SelectCall(const User *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Reg, RC, RC);
+ Reg, RC, RC, DL);
assert(InsertedCopy && "Can't copy address registers!");
InsertedCopy = InsertedCopy;
UpdateValueMap(I, ResultReg);
@@ -432,17 +488,19 @@ bool FastISel::SelectCall(const User *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
- RC, RC);
+ RC, RC, DL);
assert(InsertedCopy && "Can't copy address registers!");
InsertedCopy = InsertedCopy;
+ bool ResultRegIsKill = hasTrivialKill(I);
+
// Cast the register to the type of the selector.
if (SrcVT.bitsGT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg);
+ ResultReg, ResultRegIsKill);
else if (SrcVT.bitsLT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg);
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
if (ResultReg == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -490,12 +548,15 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
// If the operand is i1, arrange for the high bits in the register to be zero.
if (SrcVT == MVT::i1) {
SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
- InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
if (!InputReg)
return false;
+ InputRegIsKill = true;
}
// If the result is i1, truncate to the target's type for i1 first.
if (DstVT == MVT::i1)
@@ -504,7 +565,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
DstVT.getSimpleVT(),
Opcode,
- InputReg);
+ InputReg, InputRegIsKill);
if (!ResultReg)
return false;
@@ -536,6 +597,8 @@ bool FastISel::SelectBitCast(const User *I) {
if (Op0 == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
@@ -545,7 +608,7 @@ bool FastISel::SelectBitCast(const User *I) {
ResultReg = createResultReg(DstClass);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Op0, DstClass, SrcClass);
+ Op0, DstClass, SrcClass, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -553,7 +616,7 @@ bool FastISel::SelectBitCast(const User *I) {
// If the reg-reg copy failed, select a BIT_CONVERT opcode.
if (!ResultReg)
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- ISD::BIT_CONVERT, Op0);
+ ISD::BIT_CONVERT, Op0, Op0IsKill);
if (!ResultReg)
return false;
@@ -609,10 +672,12 @@ FastISel::SelectFNeg(const User *I) {
unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
if (OpReg == 0) return false;
+ bool OpRegIsKill = hasTrivialKill(I);
+
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(I->getType());
unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
- ISD::FNEG, OpReg);
+ ISD::FNEG, OpReg, OpRegIsKill);
if (ResultReg != 0) {
UpdateValueMap(I, ResultReg);
return true;
@@ -626,18 +691,19 @@ FastISel::SelectFNeg(const User *I) {
return false;
unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BIT_CONVERT, OpReg);
+ ISD::BIT_CONVERT, OpReg, OpRegIsKill);
if (IntReg == 0)
return false;
- unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
UINT64_C(1) << (VT.getSizeInBits()-1),
IntVT.getSimpleVT());
if (IntResultReg == 0)
return false;
ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BIT_CONVERT, IntResultReg);
+ ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
@@ -782,7 +848,8 @@ FastISel::FastISel(MachineFunction &mf,
TM(MF.getTarget()),
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
+ TLI(*TM.getTargetLowering()),
+ IsBottomUp(false) {
}
FastISel::~FastISel() {}
@@ -793,13 +860,15 @@ unsigned FastISel::FastEmit_(MVT, MVT,
}
unsigned FastISel::FastEmit_r(MVT, MVT,
- unsigned, unsigned /*Op0*/) {
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
return 0;
}
unsigned FastISel::FastEmit_rr(MVT, MVT,
- unsigned, unsigned /*Op0*/,
- unsigned /*Op0*/) {
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
return 0;
}
@@ -813,20 +882,23 @@ unsigned FastISel::FastEmit_f(MVT, MVT,
}
unsigned FastISel::FastEmit_ri(MVT, MVT,
- unsigned, unsigned /*Op0*/,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
uint64_t /*Imm*/) {
return 0;
}
unsigned FastISel::FastEmit_rf(MVT, MVT,
- unsigned, unsigned /*Op0*/,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
const ConstantFP * /*FPImm*/) {
return 0;
}
unsigned FastISel::FastEmit_rri(MVT, MVT,
unsigned,
- unsigned /*Op0*/, unsigned /*Op1*/,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
uint64_t /*Imm*/) {
return 0;
}
@@ -836,16 +908,18 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
/// If that fails, it materializes the immediate into a register and try
/// FastEmit_rr instead.
unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
- unsigned Op0, uint64_t Imm,
- MVT ImmType) {
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg != 0)
return ResultReg;
unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
if (MaterialReg == 0)
return 0;
- return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
}
/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
@@ -853,10 +927,10 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
/// FastEmit_rf. If that fails, it materializes the immediate into a register
/// and try FastEmit_rr instead.
unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
- unsigned Op0, const ConstantFP *FPImm,
- MVT ImmType) {
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm, MVT ImmType) {
// First check if immediate type is legal. If not, we can't use the rf form.
- unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
if (ResultReg != 0)
return ResultReg;
@@ -886,11 +960,13 @@ unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
if (IntegerReg == 0)
return 0;
MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
- ISD::SINT_TO_FP, IntegerReg);
+ ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
if (MaterialReg == 0)
return 0;
}
- return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
}
unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
@@ -908,16 +984,16 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0) {
+ unsigned Op0, bool Op0IsKill) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0);
+ BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -927,16 +1003,21 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, unsigned Op1) {
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -945,16 +1026,21 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, uint64_t Imm) {
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -963,16 +1049,21 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, const ConstantFP *FPImm) {
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -981,16 +1072,24 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, unsigned Op1, uint64_t Imm) {
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1008,7 +1107,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
else {
BuildMI(MBB, DL, II).addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1016,18 +1115,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
}
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, uint32_t Idx) {
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
const TargetRegisterClass* RC = MRI.getRegClass(Op0);
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Idx);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Idx);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1036,8 +1140,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
/// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
- return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
@@ -1070,6 +1174,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// emitted yet.
for (BasicBlock::const_iterator I = SuccBB->begin();
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
// Ignore dead phi's.
if (PN->use_empty()) continue;
@@ -1092,12 +1197,19 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
unsigned Reg = getRegForValue(PHIOp);
if (Reg == 0) {
PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
}
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5dae82..16eb8a7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -143,7 +143,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// Create the reg, emit the copy.
VRBase = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Node->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
@@ -265,7 +265,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug) {
+ bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
@@ -289,7 +289,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Op.getNode()->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
VReg = NewVReg;
@@ -297,15 +297,25 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
}
// If this value has only one use, that use is a kill. This is a
- // conservative approximation. Tied operands are never killed, so we need
- // to check that. And that means we need to determine the index of the
- // operand.
- unsigned Idx = MI->getNumOperands();
- while (Idx > 0 &&
- MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
- --Idx;
- bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
- bool isKill = Op.hasOneUse() && !isTied && !IsDebug;
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MI->getNumOperands();
+ while (Idx > 0 &&
+ MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
false/*isImp*/, isKill,
@@ -322,9 +332,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug) {
+ bool IsDebug, bool IsClone, bool IsCloned) {
if (Op.isMachineOpcode()) {
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
} else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
@@ -373,7 +384,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
}
}
@@ -395,7 +407,8 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC,
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void InstrEmitter::EmitSubregNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap){
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
unsigned VRBase = 0;
unsigned Opc = Node->getMachineOpcode();
@@ -439,7 +452,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Add def, source, and subreg index
MI->addOperand(MachineOperand::CreateReg(VRBase, true));
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
MI->addOperand(MachineOperand::CreateImm(SubIdx));
MBB->insert(InsertPos, MI);
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
@@ -473,9 +487,11 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
} else
- AddOperand(MI, N0, 0, 0, VRBaseMap);
+ AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
// Add the subregster being inserted
- AddOperand(MI, N1, 0, 0, VRBaseMap);
+ AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
MI->addOperand(MachineOperand::CreateImm(SubIdx));
MBB->insert(InsertPos, MI);
} else
@@ -503,7 +519,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
// Create the new VReg in the destination class and emit a copy.
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Node->getDebugLoc());
assert(Emitted &&
"Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
(void) Emitted;
@@ -517,7 +533,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
void InstrEmitter::EmitRegSequence(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
unsigned NewVReg = MRI->createVirtualRegister(RC);
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -528,17 +545,21 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
-#ifndef NDEBUG
if (i & 1) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
- getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
- assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE");
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (!SRC)
+ llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
+ if (SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
-#endif
- AddOperand(MI, Op, i+1, &II, VRBaseMap);
+ AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
}
MBB->insert(InsertPos, MI);
@@ -579,11 +600,17 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addReg(0U); // undef
else
AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
- true /*IsDebug*/);
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- MIB.addImm(CI->getSExtValue());
+ // FIXME: SDDbgValues aren't updated with legalization, so it's possible
+ // to have i128 values in them at this point. As a crude workaround, just
+ // drop the debug info if this happens.
+ if (!CI->getValue().isSignedIntN(64))
+ MIB.addReg(0U);
+ else
+ MIB.addImm(CI->getSExtValue());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
MIB.addFPImm(CF);
} else {
@@ -612,7 +639,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
- EmitSubregNode(Node, VRBaseMap);
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
return;
}
@@ -624,7 +651,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Handle REG_SEQUENCE specially.
if (Opc == TargetOpcode::REG_SEQUENCE) {
- EmitRegSequence(Node, VRBaseMap);
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
return;
}
@@ -663,7 +690,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
for (unsigned i = NumSkip; i != NodeOperands; ++i)
AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
- VRBaseMap);
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
// Transfer all of the memory reference descriptions of this instruction.
MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
@@ -749,7 +776,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->getOperand(1).getValueType());
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
- DstTRC, SrcTRC);
+ DstTRC, SrcTRC, Node->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
break;
@@ -810,7 +837,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (; NumVals; --NumVals, ++i)
- AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
break;
}
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c7e7c71..02c044c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -65,7 +65,7 @@ class InstrEmitter {
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug = false);
+ bool IsDebug, bool IsClone, bool IsCloned);
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
@@ -75,11 +75,12 @@ class InstrEmitter {
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug = false);
+ bool IsDebug, bool IsClone, bool IsCloned);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
- void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
/// COPY_TO_REGCLASS is just a normal copy, except that the destination
@@ -90,7 +91,8 @@ class InstrEmitter {
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
- void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
public:
/// CountResults - The results of target nodes have register or immediate
/// operands first, then an optional chain, and optional flag operands
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bedfa57..62a37a5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -23,7 +23,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -2027,6 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation has the advantage
@@ -2052,6 +2052,41 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
+ // Implementation of unsigned i64 to f32. This implementation has the
+ // advantage of performing rounding correctly.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ EVT SHVT = TLI.getShiftAmountTy();
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0));
+
+ }
+
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2488,6 +2523,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
+ if (getTypeAction(EltVT) == Promote)
+ EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 548454c..8b382bc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
}
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Op = N->getOperand(0);
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
- if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
// Do a signed conversion then adjust the result.
SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
SignedConv = TLI.LowerOperation(SignedConv, DAG);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d60ad60..c665963 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -32,7 +32,7 @@ namespace llvm {
/// involves promoting small sizes to large sizes or splitting up large values
/// into small values.
///
-class VISIBILITY_HIDDEN DAGTypeLegalizer {
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
const TargetLowering &TLI;
SelectionDAG &DAG;
public:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b92a672..56f5ded 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -30,7 +30,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/Statistic.h"
#include <climits>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index da02850..820ba66 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -53,6 +52,12 @@ static RegisterScheduler
"order when possible",
createSourceListDAGScheduler);
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up rr list scheduling which avoid stalls for "
+ "long latency instructions",
+ createHybridListDAGScheduler);
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -64,6 +69,10 @@ private:
/// it is top-down.
bool isBottomUp;
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
@@ -80,9 +89,9 @@ private:
public:
ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup,
+ bool isbottomup, bool needlatency,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
AvailableQueue(availqueue), Topo(SUnits) {
}
@@ -161,9 +170,11 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Return true, since register-pressure-reducing
- /// scheduling doesn't need actual latency information.
- bool ForceUnitLatencies() const { return true; }
+ /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool ForceUnitLatencies() const {
+ return !NeedLatency;
+ }
};
} // end anonymous namespace
@@ -213,6 +224,12 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
+ if (!ForceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
// If all the node's successors are scheduled, this node is ready
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -244,10 +261,15 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
- assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Handle noop hazard.
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
@@ -339,6 +361,7 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
SU->isAvailable = false;
UnscheduleNodeBottomUp(OldSU);
--CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
@@ -386,7 +409,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
- DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -504,7 +527,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SU = NewSU;
}
- DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
NewSU = CreateClone(SU);
// New SUnit has the exact same predecessors.
@@ -786,7 +809,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
/*Reg=*/0, /*isNormalMemory=*/false,
@@ -795,7 +818,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
NewDef = Copies.back();
}
- DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
@@ -821,6 +844,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
if (CurSU)
ScheduleNodeBottomUp(CurSU, CurCycle);
++CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
@@ -889,6 +913,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
unsigned CurCycle = 0;
+ AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
ReleaseSuccessors(&EntrySU);
@@ -911,6 +936,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
if (CurSU)
ScheduleNodeTopDown(CurSU, CurCycle);
++CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
#ifndef NDEBUG
@@ -956,6 +982,16 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+
+ struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
+ hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
} // end anonymous namespace
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
@@ -990,8 +1026,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
namespace {
template<class SF>
class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
- unsigned currentQueueId;
+ std::vector<SUnit*> Queue;
+ SF Picker;
+ unsigned CurQueueId;
protected:
// SUnits - The SUnits for the current graph.
@@ -1007,7 +1044,7 @@ namespace {
public:
RegReductionPriorityQueue(const TargetInstrInfo *tii,
const TargetRegisterInfo *tri)
- : Queue(SF(this)), currentQueueId(0),
+ : Picker(this), CurQueueId(0),
TII(tii), TRI(tri), scheduleDAG(NULL) {}
void initNodes(std::vector<SUnit> &sunits) {
@@ -1067,26 +1104,26 @@ namespace {
unsigned getNodeOrdering(const SUnit *SU) const {
return scheduleDAG->DAG->GetOrdering(SU->getNode());
}
-
- unsigned size() const { return Queue.size(); }
bool empty() const { return Queue.empty(); }
void push(SUnit *U) {
assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++currentQueueId;
- Queue.push(U);
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
}
- void push_all(const std::vector<SUnit *> &Nodes) {
- for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
- push(Nodes[i]);
- }
-
SUnit *pop() {
if (empty()) return NULL;
- SUnit *V = Queue.top();
- Queue.pop();
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
V->NodeQueueId = 0;
return V;
}
@@ -1094,7 +1131,11 @@ namespace {
void remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
- Queue.erase_one(SU);
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
SU->NodeQueueId = 0;
}
@@ -1117,6 +1158,9 @@ namespace {
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+ HybridBURRPriorityQueue;
}
/// closestSucc - Returns the scheduled cycle of the successor which is
@@ -1203,7 +1247,7 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1215,6 +1259,25 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return BURRSort(left, right, SPQ);
}
+bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+ bool LStall = left->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < left->getHeight();
+ bool RStall = right->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < right->getHeight();
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort them
+ // according to their height.
+ // If neither will cause a pipeline stall, try to reduce register pressure.
+ if (LStall) {
+ if (!RStall)
+ return true;
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ } else if (RStall)
+ return false;
+ return BURRSort(left, right, SPQ);
+}
+
template<class SF>
bool
RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
@@ -1379,8 +1442,8 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
// Ok, the transformation is safe and the heuristics suggest it is
// profitable. Update the graph.
- DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
- << " next to PredSU # " << PredSU->NodeNum
+ DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
<< " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
@@ -1469,7 +1532,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
(hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
- DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
<< SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
/*Reg=*/0, /*isNormalMemory=*/false,
@@ -1563,8 +1626,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
@@ -1577,8 +1639,7 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
@@ -1591,8 +1652,20 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 76e4771..3185c88 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtarget.h"
#include "llvm/ADT/DenseMap.h"
@@ -44,6 +45,24 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
ScheduleDAG::Run(bb, insertPos);
}
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
@@ -52,6 +71,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
}
@@ -217,9 +237,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
- // Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
-
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
@@ -282,10 +299,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
N->setNodeId(NodeSUnit->NodeNum);
// Assign the Latency field of NodeSUnit using target-provided information.
- if (UnitLatencies)
- NodeSUnit->Latency = 1;
- else
- ComputeLatency(NodeSUnit);
+ ComputeLatency(NodeSUnit);
}
}
@@ -353,7 +367,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpSU->Latency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+ ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -377,7 +391,17 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty()) {
+ SU->Latency = 1;
+ return;
+ }
// Compute the latency for the node. We use the sum of the latencies for
// all nodes flagged together into this SUnit.
@@ -389,6 +413,37 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
}
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies())
+ return;
+
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (DefIdx >= II.getNumDefs())
+ return;
+ int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
+ if (DefCycle < 0)
+ return;
+ const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ if (UseCycle >= 0) {
+ int Latency = DefCycle - UseCycle + 1;
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+ }
+ }
+}
+
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 7ae8ec2..e8714ba 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -66,18 +66,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N) {
-#ifndef NDEBUG
- const SUnit *Addr = 0;
- if (!SUnits.empty())
- Addr = &SUnits[0];
-#endif
- SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
- "SUnits std::vector reallocated on the fly!");
- SUnits.back().OrigNode = &SUnits.back();
- return &SUnits.back();
- }
+ SUnit *NewSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -94,6 +83,15 @@ namespace llvm {
///
virtual void ComputeLatency(SUnit *SU);
+ /// ComputeOperandLatency - Override dependence edge latency using
+ /// operand use/def information
+ ///
+ virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const { }
+
+ virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
virtual MachineBasicBlock *EmitSchedule();
/// Schedule - Order nodes according to selected style, filling
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e6df742..38bf68b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -15,6 +15,7 @@
#include "SDNodeOrdering.h"
#include "SDNodeDbgValue.h"
#include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Function.h"
#include "llvm/GlobalAlias.h"
@@ -32,6 +33,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -789,7 +791,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
- : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
+ : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+ FLI(fli),
EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
@@ -963,8 +966,18 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
EVT EltVT = VT.getScalarType();
if (EltVT==MVT::f32)
return getConstantFP(APFloat((float)Val), VT, isTarget);
- else
+ else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ bool ignored;
+ APFloat apf = APFloat(Val);
+ apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(apf, VT, isTarget);
+ } else {
+ assert(0 && "Unsupported type in getConstantFP");
+ return SDValue();
+ }
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
@@ -2614,7 +2627,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
break;
case ISD::AND:
- assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
@@ -2627,7 +2641,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
@@ -2642,7 +2657,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SDIV:
case ISD::SREM:
assert(VT.isInteger() && "This operator does not apply to FP types!");
- // fall through
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -2665,6 +2682,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
return N1;
}
}
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
@@ -3525,7 +3543,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.getNode())
@@ -3590,7 +3608,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.getNode())
return Result;
@@ -3641,7 +3659,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
DstSV, DstSVOff);
if (Result.getNode())
return Result;
@@ -5417,6 +5435,8 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
+ assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
}
@@ -5607,6 +5627,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LSDAADDR: return "LSDAADDR";
case ISD::EHSELECTION: return "EHSELECTION";
case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -6008,6 +6030,21 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38b204..fbe601f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3726,6 +3726,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
return true;
}
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
/// we want to emit this as a call to a named external function, return the name
/// otherwise lower it and return null.
@@ -3818,7 +3824,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ if (!DIVariable(DI.getVariable()).Verify())
return 0;
MDNode *Variable = DI.getVariable();
@@ -3881,7 +3887,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ if (!DIVariable(DI.getVariable()).Verify())
return 0;
MDNode *Variable = DI.getVariable();
@@ -3900,6 +3906,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, 0, false);
} else {
+ bool createUndef = false;
+ // FIXME : Why not use getValue() directly ?
SDValue &N = NodeMap[V];
if (N.getNode()) {
if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
@@ -3907,7 +3915,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else {
+ } else if (isa<PHINode>(V) && !V->use_empty()) {
+ SDValue N = getValue(V);
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else
+ createUndef = true;
+ } else
+ createUndef = true;
+ if (createUndef) {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter; we need this fallback.
SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
@@ -4018,6 +4038,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setCurrentCallSite(CI->getZExtValue());
return 0;
}
+ case Intrinsic::eh_sjlj_setjmp: {
+ setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+ getValue(I.getOperand(1))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(),
+ getValue(I.getOperand(1))));
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
@@ -4924,7 +4955,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
namespace llvm {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
-class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 422cb7a..65b8d4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -25,9 +25,11 @@
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -131,11 +133,13 @@ namespace llvm {
if (OptLevel == CodeGenOpt::None)
return createFastDAGScheduler(IS, OptLevel);
- if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+ if (TLI.getSchedulingPreference() == Sched::Latency)
return createTDListDAGScheduler(IS, OptLevel);
- assert(TLI.getSchedulingPreference() ==
- TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
- return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
+ "Unknown sched type!");
+ return createHybridListDAGScheduler(IS, OptLevel);
}
}
@@ -188,6 +192,39 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
+/// other function that gcc recognizes as "returning twice". This is used to
+/// limit code-gen optimizations on the machine function.
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+static bool FunctionCallsSetJmp(const Function *F) {
+ const Module *M = F->getParent();
+ static const char *ReturnsTwiceFns[] = {
+ "setjmp",
+ "sigsetjmp",
+ "setjmp_syscall",
+ "savectx",
+ "qsetjmp",
+ "vfork",
+ "getcontext"
+ };
+#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
+
+ for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
+ if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+ if (!Callee->use_empty())
+ for (Value::const_use_iterator
+ I = Callee->use_begin(), E = Callee->use_end();
+ I != E; ++I)
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->getParent()->getParent() == F)
+ return true;
+ }
+
+ return false;
+#undef NUM_RETURNS_TWICE_FNS
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -218,6 +255,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *EntryMBB = MF->begin();
RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
// Insert DBG_VALUE instructions for function arguments to the entry block.
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
@@ -230,8 +274,44 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// FIXME: VR def may not be in entry block.
Def->getParent()->insert(llvm::next(InsertPos), MI);
}
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ unsigned Offset = MI->getOperand(1).getImm();
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(LDI->second, RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (!MFI->hasCalls()) {
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator
+ II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+ if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) {
+ MFI->setHasCalls(true);
+ goto done;
+ }
+ }
+ }
+ done:;
}
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -662,6 +742,7 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->setRoot(Dummy.getValue());
}
+
DEBUG(errs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8a4a1b1..44a80d3 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -18,7 +18,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -544,7 +543,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
- SchedPreferenceInfo = SchedulingForLatency;
+ SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
IfCvtBlockSizeLimit = 2;
@@ -2417,7 +2416,7 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint[0] != '{')
- return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
@@ -2449,7 +2448,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
- return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index d20477f..a081e3c 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+ : TD(TM.getTargetData()) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
OpenPOWER on IntegriCloud