diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
commit | 1e3dec662ea18131c495db50caccc57f77b7a5fe (patch) | |
tree | 9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/CodeGen/SelectionDAG | |
parent | 377552607e51dc1d3e6ff33833f9620bcfe815ac (diff) | |
download | FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz |
Update LLVM to r104832.
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 144 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/FastISel.cpp | 262 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 98 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/InstrEmitter.h | 10 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 39 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 18 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 147 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 71 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 22 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 53 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 39 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 89 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp | 4 |
16 files changed, 785 insertions, 221 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3639f80..6bddd78 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -760,12 +760,18 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { bool Replace1 = false; SDValue N1 = Op.getOperand(1); - SDValue NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) - return SDValue(); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + } AddToWorkList(NN0.getNode()); - AddToWorkList(NN1.getNode()); + if (NN1.getNode()) + AddToWorkList(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); @@ -3425,8 +3431,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -3564,7 +3574,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3585,9 +3595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); - } - - + } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3615,8 +3623,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); } } @@ -3726,8 +3738,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } - // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT N0VT = N0.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } else { + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), @@ -3780,8 +3832,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); } } @@ -3883,8 +3939,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } - // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), @@ -5278,10 +5365,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset && Op->getOpcode() == ISD::ADD) - std::swap(BasePtr, Offset); - if (Ptr != BasePtr) - continue; // Don't create a indexed load / store with zero offset. if (isa<ConstantSDNode>(Offset) && cast<ConstantSDNode>(Offset)->isNullValue()) @@ -5953,6 +6036,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.getOpcode() == ISD::UNDEF) + return InVec; + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new // vector with the inserted element. if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6206,7 +6293,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // all scalar elements the same. if (cast<ShuffleVectorSDNode>(N)->isSplat()) { SDNode *V = N0.getNode(); - // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to @@ -6338,13 +6424,21 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { break; } - Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), - EltType, LHSOp, RHSOp)); - AddToWorkList(Ops.back().getNode()); - assert((Ops.back().getOpcode() == ISD::UNDEF || - Ops.back().getOpcode() == ISD::Constant || - Ops.back().getOpcode() == ISD::ConstantFP) && - "Scalar binop didn't fold!"); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (LHSOp.getValueType() != EltType) + LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); + if (RHSOp.getValueType() != EltType) + RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); + + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, + LHSOp, RHSOp); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::Constant && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); } if (Ops.size() == LHS.getNumOperands()) { diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b4c3833..95f4d07 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -56,6 +56,27 @@ #include "FunctionLoweringInfo.h" using namespace llvm; +bool FastISel::hasTrivialKill(const Value *V) const { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(I->use_begin())->getParent() == I->getParent(); +} + unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. @@ -78,12 +99,24 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V); + if (I != ValueMap.end()) + return I->second; unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (IsBottomUp) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + if (isa<Instruction>(V)) + ValueMap[V] = Reg; + else + LocalValueMap[V] = Reg; + return Reg; + } + return materializeRegForValue(V, VT); } @@ -123,7 +156,8 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { unsigned IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg, /*Kill=*/false); } } } else if (const Operator *Op = dyn_cast<Operator>(V)) { @@ -174,25 +208,33 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { else if (Reg != AssignedReg) { const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass); + Reg, RegClass, RegClass, DL); } return AssignedReg; } -unsigned FastISel::getRegForGEPIndex(const Value *Idx) { +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { unsigned IdxN = getRegForValue(Idx); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. - return 0; + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); - if (IdxVT.bitsLT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); - else if (IdxVT.bitsGT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); - return IdxN; + if (IdxVT.bitsLT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + else if (IdxVT.bitsGT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -224,10 +266,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CI->getZExtValue()); + ISDOpcode, Op0, Op0IsKill, + CI->getZExtValue()); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -238,7 +283,7 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the second operand is a constant float. if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CF); + ISDOpcode, Op0, Op0IsKill, CF); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -251,9 +296,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + // Now we have both operands in registers. Emit the instruction. unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op1); + ISDOpcode, + Op0, Op0IsKill, + Op1, Op1IsKill); if (ResultReg == 0) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. @@ -270,6 +319,8 @@ bool FastISel::SelectGetElementPtr(const User *I) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + const Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -282,10 +333,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); // FIXME: This can be optimized by combining the add with a // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; } Ty = StTy->getElementType(Field); } else { @@ -296,27 +348,31 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; continue; } // N = N + Idx * ElementSize; uint64_t ElementSize = TD.getTypeAllocSize(Ty); - unsigned IdxN = getRegForGEPIndex(Idx); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -338,7 +394,7 @@ bool FastISel::SelectCall(const User *I) { default: break; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) || + if (!DIVariable(DI->getVariable()).Verify() || !MF.getMMI().hasDebugInfo()) return true; @@ -402,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC); + Reg, RC, RC, DL); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; UpdateValueMap(I, ResultReg); @@ -432,17 +488,19 @@ bool FastISel::SelectCall(const User *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC); + RC, RC, DL); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; + bool ResultRegIsKill = hasTrivialKill(I); + // Cast the register to the type of the selector. if (SrcVT.bitsGT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg); + ResultReg, ResultRegIsKill); else if (SrcVT.bitsLT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg); + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); if (ResultReg == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -490,12 +548,15 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); - InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); if (!InputReg) return false; + InputRegIsKill = true; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) @@ -504,7 +565,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, - InputReg); + InputReg, InputRegIsKill); if (!ResultReg) return false; @@ -536,6 +597,8 @@ bool FastISel::SelectBitCast(const User *I) { if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; @@ -545,7 +608,7 @@ bool FastISel::SelectBitCast(const User *I) { ResultReg = createResultReg(DstClass); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass); + Op0, DstClass, SrcClass, DL); if (!InsertedCopy) ResultReg = 0; } @@ -553,7 +616,7 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BIT_CONVERT opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BIT_CONVERT, Op0); + ISD::BIT_CONVERT, Op0, Op0IsKill); if (!ResultReg) return false; @@ -609,10 +672,12 @@ FastISel::SelectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); if (OpReg == 0) return false; + bool OpRegIsKill = hasTrivialKill(I); + // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg); + ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg != 0) { UpdateValueMap(I, ResultReg); return true; @@ -626,18 +691,19 @@ FastISel::SelectFNeg(const User *I) { return false; unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BIT_CONVERT, OpReg); + ISD::BIT_CONVERT, OpReg, OpRegIsKill); if (IntReg == 0) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg, + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, + IntReg, /*Kill=*/true, UINT64_C(1) << (VT.getSizeInBits()-1), IntVT.getSimpleVT()); if (IntResultReg == 0) return false; ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BIT_CONVERT, IntResultReg); + ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); if (ResultReg == 0) return false; @@ -782,7 +848,8 @@ FastISel::FastISel(MachineFunction &mf, TM(MF.getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()) { + TLI(*TM.getTargetLowering()), + IsBottomUp(false) { } FastISel::~FastISel() {} @@ -793,13 +860,15 @@ unsigned FastISel::FastEmit_(MVT, MVT, } unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/) { return 0; } unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, unsigned /*Op0*/, - unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/) { return 0; } @@ -813,20 +882,23 @@ unsigned FastISel::FastEmit_f(MVT, MVT, } unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned, - unsigned /*Op0*/, unsigned /*Op1*/, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } @@ -836,16 +908,18 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, uint64_t Imm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); if (MaterialReg == 0) return 0; - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } /// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries @@ -853,10 +927,10 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, const ConstantFP *FPImm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); if (ResultReg != 0) return ResultReg; @@ -886,11 +960,13 @@ unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, if (IntegerReg == 0) return 0; MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg); + ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); if (MaterialReg == 0) return 0; } - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { @@ -908,16 +984,16 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0) { + unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0); + BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -927,16 +1003,21 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -945,16 +1026,21 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -963,16 +1049,21 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, const ConstantFP *FPImm) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); else { - BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -981,16 +1072,24 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1008,7 +1107,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, else { BuildMI(MBB, DL, II).addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1016,18 +1115,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, } unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, uint32_t Idx) { + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { const TargetRegisterClass* RC = MRI.getRegClass(Op0); unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1036,8 +1140,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { - return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. @@ -1070,6 +1174,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. if (PN->use_empty()) continue; @@ -1092,12 +1197,19 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DL = PN->getDebugLoc(); + if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + DL = Inst->getDebugLoc(); + unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c5dae82..16eb8a7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -143,7 +143,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC); + DstRC, SrcRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; @@ -265,7 +265,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); @@ -289,7 +289,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC); + DstRC, SrcRC, Op.getNode()->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; VReg = NewVReg; @@ -297,15 +297,25 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, } // If this value has only one use, that use is a kill. This is a - // conservative approximation. Tied operands are never killed, so we need - // to check that. And that means we need to determine the index of the - // operand. - unsigned Idx = MI->getNumOperands(); - while (Idx > 0 && - MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) - --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; - bool isKill = Op.hasOneUse() && !isTied && !IsDebug; + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, false/*isImp*/, isKill, @@ -322,9 +332,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { @@ -373,7 +384,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } } @@ -395,7 +407,8 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC, /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap){ + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); @@ -439,7 +452,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Add def, source, and subreg index MI->addOperand(MachineOperand::CreateReg(VRBase, true)); - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || @@ -473,9 +487,11 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else - AddOperand(MI, N0, 0, 0, VRBaseMap); + AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MI, N1, 0, 0, VRBaseMap); + AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else @@ -503,7 +519,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC); + DstRC, SrcRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); (void) Emitted; @@ -517,7 +533,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -528,17 +545,21 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); for (unsigned i = 0; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); -#ifndef NDEBUG if (i & 1) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); - assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (!SRC) + llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); + if (SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } -#endif - AddOperand(MI, Op, i+1, &II, VRBaseMap); + AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); } MBB->insert(InsertPos, MI); @@ -579,11 +600,17 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); // undef else AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, - true /*IsDebug*/); + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - MIB.addImm(CI->getSExtValue()); + // FIXME: SDDbgValues aren't updated with legalization, so it's possible + // to have i128 values in them at this point. As a crude workaround, just + // drop the debug info if this happens. + if (!CI->getValue().isSignedIntN(64)) + MIB.addReg(0U); + else + MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); } else { @@ -612,7 +639,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { - EmitSubregNode(Node, VRBaseMap); + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -624,7 +651,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { - EmitRegSequence(Node, VRBaseMap); + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -663,7 +690,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, - VRBaseMap); + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), @@ -749,7 +776,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->getOperand(1).getValueType()); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC); + DstTRC, SrcTRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; break; @@ -810,7 +837,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) - AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); break; } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index c7e7c71..02c044c 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -65,7 +65,7 @@ class InstrEmitter { unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the @@ -75,11 +75,12 @@ class InstrEmitter { unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination @@ -90,7 +91,8 @@ class InstrEmitter { /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bedfa57..62a37a5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -23,7 +23,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -2027,6 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation has the advantage @@ -2052,6 +2052,41 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } + // Implementation of unsigned i64 to f32. This implementation has the + // advantage of performing rounding correctly. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + EVT SHVT = TLI.getShiftAmountTy(); + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0)); + + } + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), @@ -2488,6 +2523,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); + if (getTypeAction(EltVT) == Promote) + EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElems; ++i) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 548454c..8b382bc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); SignedConv = TLI.LowerOperation(SignedConv, DAG); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d60ad60..c665963 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -32,7 +32,7 @@ namespace llvm { /// involves promoting small sizes to large sizes or splitting up large values /// into small values. /// -class VISIBILITY_HIDDEN DAGTypeLegalizer { +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { const TargetLowering &TLI; SelectionDAG &DAG; public: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index b92a672..56f5ded 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -30,7 +30,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/Statistic.h" #include <climits> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index da02850..820ba66 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,7 +24,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -53,6 +52,12 @@ static RegisterScheduler "order when possible", createSourceListDAGScheduler); +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up rr list scheduling which avoid stalls for " + "long latency instructions", + createHybridListDAGScheduler); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -64,6 +69,10 @@ private: /// it is top-down. bool isBottomUp; + /// NeedLatency - True if the scheduler will make use of latency information. + /// + bool NeedLatency; + /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; @@ -80,9 +89,9 @@ private: public: ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, + bool isbottomup, bool needlatency, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), AvailableQueue(availqueue), Topo(SUnits) { } @@ -161,9 +170,11 @@ private: return NewNode; } - /// ForceUnitLatencies - Return true, since register-pressure-reducing - /// scheduling doesn't need actual latency information. - bool ForceUnitLatencies() const { return true; } + /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool ForceUnitLatencies() const { + return !NeedLatency; + } }; } // end anonymous namespace @@ -213,6 +224,12 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; + if (!ForceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -244,10 +261,15 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); +#endif + + // FIXME: Handle noop hazard. SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); @@ -339,6 +361,7 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, SU->isAvailable = false; UnscheduleNodeBottomUp(OldSU); --CurCycle; + AvailableQueue->setCurCycle(CurCycle); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); @@ -386,7 +409,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -504,7 +527,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -786,7 +809,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -795,7 +818,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, @@ -821,6 +844,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { if (CurSU) ScheduleNodeBottomUp(CurSU, CurCycle); ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); } // Reverse the order if it is bottom up. @@ -889,6 +913,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { unsigned CurCycle = 0; + AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. ReleaseSuccessors(&EntrySU); @@ -911,6 +936,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() { if (CurSU) ScheduleNodeTopDown(CurSU, CurCycle); ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); } #ifndef NDEBUG @@ -956,6 +982,16 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + + struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; + hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; } // end anonymous namespace /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. @@ -990,8 +1026,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { namespace { template<class SF> class RegReductionPriorityQueue : public SchedulingPriorityQueue { - PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; - unsigned currentQueueId; + std::vector<SUnit*> Queue; + SF Picker; + unsigned CurQueueId; protected: // SUnits - The SUnits for the current graph. @@ -1007,7 +1044,7 @@ namespace { public: RegReductionPriorityQueue(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) - : Queue(SF(this)), currentQueueId(0), + : Picker(this), CurQueueId(0), TII(tii), TRI(tri), scheduleDAG(NULL) {} void initNodes(std::vector<SUnit> &sunits) { @@ -1067,26 +1104,26 @@ namespace { unsigned getNodeOrdering(const SUnit *SU) const { return scheduleDAG->DAG->GetOrdering(SU->getNode()); } - - unsigned size() const { return Queue.size(); } bool empty() const { return Queue.empty(); } void push(SUnit *U) { assert(!U->NodeQueueId && "Node in the queue already"); - U->NodeQueueId = ++currentQueueId; - Queue.push(U); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); } - void push_all(const std::vector<SUnit *> &Nodes) { - for (unsigned i = 0, e = Nodes.size(); i != e; ++i) - push(Nodes[i]); - } - SUnit *pop() { if (empty()) return NULL; - SUnit *V = Queue.top(); - Queue.pop(); + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); V->NodeQueueId = 0; return V; } @@ -1094,7 +1131,11 @@ namespace { void remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - Queue.erase_one(SU); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); SU->NodeQueueId = 0; } @@ -1117,6 +1158,9 @@ namespace { typedef RegReductionPriorityQueue<src_ls_rr_sort> SrcRegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> + HybridBURRPriorityQueue; } /// closestSucc - Returns the scheduled cycle of the successor which is @@ -1203,7 +1247,7 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } // Source order, otherwise bottom up. -bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ +bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -1215,6 +1259,25 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return BURRSort(left, right, SPQ); } +bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort them + // according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) + return false; + return BURRSort(left, right, SPQ); +} + template<class SF> bool RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { @@ -1379,8 +1442,8 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum - << " next to PredSU # " << PredSU->NodeNum + DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + << " next to PredSU #" << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; @@ -1469,7 +1532,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " + DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, @@ -1563,8 +1626,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; } @@ -1577,8 +1639,7 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); PQ->setScheduleDAG(SD); return SD; } @@ -1591,8 +1652,20 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); PQ->setScheduleDAG(SD); return SD; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 76e4771..3185c88 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" #include "llvm/ADT/DenseMap.h" @@ -44,6 +45,24 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, ScheduleDAG::Run(bb, insertPos); } +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; @@ -52,6 +71,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; } @@ -217,9 +237,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // This is a temporary workaround. SUnits.reserve(NumNodes * 2); - // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); - // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; @@ -282,10 +299,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { N->setNodeId(NodeSUnit->NodeNum); // Assign the Latency field of NodeSUnit using target-provided information. - if (UnitLatencies) - NodeSUnit->Latency = 1; - else - ComputeLatency(NodeSUnit); + ComputeLatency(NodeSUnit); } } @@ -353,7 +367,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep)); + ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -377,7 +391,17 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) { + SU->Latency = 1; + return; + } + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) { + SU->Latency = 1; + return; + } // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. @@ -389,6 +413,37 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } } +void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) + return; + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (DefIdx >= II.getNumDefs()) + return; + int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); + if (DefCycle < 0) + return; + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + if (UseCycle >= 0) { + int Latency = DefCycle - UseCycle + 1; + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 7ae8ec2..e8714ba 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -66,18 +66,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N) { -#ifndef NDEBUG - const SUnit *Addr = 0; - if (!SUnits.empty()) - Addr = &SUnits[0]; -#endif - SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } + SUnit *NewSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -94,6 +83,15 @@ namespace llvm { /// virtual void ComputeLatency(SUnit *SU); + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { } + + virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + virtual MachineBasicBlock *EmitSchedule(); /// Schedule - Order nodes according to selected style, filling diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e6df742..38bf68b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -15,6 +15,7 @@ #include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" #include "llvm/Constants.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" @@ -32,6 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -789,7 +791,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) - : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli), + : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), + FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -963,8 +966,18 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { EVT EltVT = VT.getScalarType(); if (EltVT==MVT::f32) return getConstantFP(APFloat((float)Val), VT, isTarget); - else + else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); + else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + bool ignored; + APFloat apf = APFloat(Val); + apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(apf, VT, isTarget); + } else { + assert(0 && "Unsupported type in getConstantFP"); + return SDValue(); + } } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, @@ -2614,7 +2627,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } break; case ISD::AND: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. @@ -2627,7 +2641,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::XOR: case ISD::ADD: case ISD::SUB: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. @@ -2642,7 +2657,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SDIV: case ISD::SREM: assert(VT.isInteger() && "This operator does not apply to FP types!"); - // fall through + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -2665,6 +2682,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return N1; } } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); break; @@ -3525,7 +3543,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) @@ -3590,7 +3608,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; @@ -3641,7 +3659,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff); if (Result.getNode()) return Result; @@ -5417,6 +5435,8 @@ const EVT *SDNode::getValueTypeList(EVT VT) { sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { + assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + "Value type out of range!"); return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } } @@ -5607,6 +5627,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LSDAADDR: return "LSDAADDR"; case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; @@ -6008,6 +6030,21 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a38b204..fbe601f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3726,6 +3726,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, return true; } +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. @@ -3818,7 +3824,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) + if (!DIVariable(DI.getVariable()).Verify()) return 0; MDNode *Variable = DI.getVariable(); @@ -3881,7 +3887,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) + if (!DIVariable(DI.getVariable()).Verify()) return 0; MDNode *Variable = DI.getVariable(); @@ -3900,6 +3906,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { + bool createUndef = false; + // FIXME : Why not use getValue() directly ? SDValue &N = NodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { @@ -3907,7 +3915,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else { + } else if (isa<PHINode>(V) && !V->use_empty()) { + SDValue N = getValue(V); + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else + createUndef = true; + } else + createUndef = true; + if (createUndef) { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), @@ -4018,6 +4038,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setCurrentCallSite(CI->getZExtValue()); return 0; } + case Intrinsic::eh_sjlj_setjmp: { + setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), + getValue(I.getOperand(1)))); + return 0; + } + case Intrinsic::eh_sjlj_longjmp: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + getRoot(), + getValue(I.getOperand(1)))); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: @@ -4924,7 +4955,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, namespace llvm { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class VISIBILITY_HIDDEN SDISelAsmOperandInfo : +class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 422cb7a..65b8d4f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -25,9 +25,11 @@ #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -131,11 +133,13 @@ namespace llvm { if (OptLevel == CodeGenOpt::None) return createFastDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) + if (TLI.getSchedulingPreference() == Sched::Latency) return createTDListDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == - TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); - return createBURRListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::Hybrid && + "Unknown sched type!"); + return createHybridListDAGScheduler(IS, OptLevel); } } @@ -188,6 +192,39 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or +/// other function that gcc recognizes as "returning twice". This is used to +/// limit code-gen optimizations on the machine function. +/// +/// FIXME: Remove after <rdar://problem/8031714> is fixed. +static bool FunctionCallsSetJmp(const Function *F) { + const Module *M = F->getParent(); + static const char *ReturnsTwiceFns[] = { + "setjmp", + "sigsetjmp", + "setjmp_syscall", + "savectx", + "qsetjmp", + "vfork", + "getcontext" + }; +#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *) + + for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I) + if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { + if (!Callee->use_empty()) + for (Value::const_use_iterator + I = Callee->use_begin(), E = Callee->use_end(); + I != E; ++I) + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (CI->getParent()->getParent() == F) + return true; + } + + return false; +#undef NUM_RETURNS_TWICE_FNS +} + bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && @@ -218,6 +255,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *EntryMBB = MF->begin(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), + E = RegInfo->livein_end(); LI != E; ++LI) + if (LI->second) + LiveInMap.insert(std::make_pair(LI->first, LI->second)); + // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; @@ -230,8 +274,44 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // FIXME: VR def may not be in entry block. Def->getParent()->insert(llvm::next(InsertPos), MI); } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + unsigned Offset = MI->getOperand(1).getImm(); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(LDI->second, RegState::Debug) + .addImm(Offset).addMetadata(Variable); + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (!MFI->hasCalls()) { + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator + II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + MFI->setHasCalls(true); + goto done; + } + } + } + done:; } + // Determine if there is a call to setjmp in the machine function. + MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -662,6 +742,7 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->setRoot(Dummy.getValue()); } + DEBUG(errs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8a4a1b1..44a80d3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -18,7 +18,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -544,7 +543,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; - SchedPreferenceInfo = SchedulingForLatency; + SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; IfCvtBlockSizeLimit = 2; @@ -2417,7 +2416,7 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint[0] != '{') - return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. @@ -2449,7 +2448,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, } } - return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index d20477f..a081e3c 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo() { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) + : TD(TM.getTargetData()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { |